Introduction

We look for genes associated with fibroblast activation in several single-cell datasets.

Preliminaries

>

Setup

Packages

suppressPackageStartupMessages({
  # install.packages("BiocManager")

  # install.packages("kableExtra")
  requireNamespace("kableExtra")

  # ?
  # devtools::install_github("rstudio/crosstalk")
  # library("crosstalk")
  # library("htmlwidgets")

  # install.packages("crunch")
  # requireNamespace("crunch")

  # https://rstudio.github.io/DT/
  # install.packages("DT")
  requireNamespace("DT")

  # install.packages("RCurl")
  requireNamespace("RCurl")

  # install.packages("memoise")
  requireNamespace("memoise")

  # install.packages("dplyr")
  library(dplyr)

  # install.packages("ggplot2")
  library(ggplot2)

  # install.packages("ggsci")
  requireNamespace("ggsci")
  # install.packages("ggtext")
  requireNamespace("ggtext")
  # install.packages("ggrepel")
  requireNamespace("ggrepel")

  # install.packages("assertthat")
  requireNamespace("assertthat")

  #
  requireNamespace("magrittr")
  `%<>%` <- magrittr::`%<>%`
  `%>%` <- magrittr::`%>%`

  # install.packages("glue")
  requireNamespace("glue")
  glue <- glue::glue

  # install.packages("tidyr")
  requireNamespace("tidyr")

  # BiocManager::install("org.Mm.eg.db")
  # BiocManager::install("org.Hs.eg.db")
  requireNamespace("org.Mm.eg.db")
  requireNamespace("org.Hs.eg.db")

  # BiocManager::install("GO.db")
  requireNamespace("GO.db")

  # https://www.bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
  # BiocManager::install("goseq")
  # requireNamespace("goseq")

  # install.packages("pracma")
  requireNamespace("pracma")

  # https://vroom.r-lib.org/articles/vroom.html
  # install.packages("vroom")
  requireNamespace("vroom")

  # BiocManager::install("DESeq2")
  requireNamespace("DESeq2")

  # BiocManager::install("scater")
  requireNamespace("scater")

  # BiocManager::install("scran")
  # requireNamespace("scran")

  # BiocManager::install("igraph")
  requireNamespace("igraph")

  # install.packages("pheatmap")
  requireNamespace("pheatmap")

  # install.packages("gridExtra")
  requireNamespace("gridExtra")

  # install.packages("pathlibr")
  requireNamespace("pathlibr")
  # install.packages("stringr")
  requireNamespace("stringr")
  # install.packages("reshape2")
  requireNamespace("reshape2")

  # install.packages("pbapply")
  # requireNamespace("pbapply")

  # install.packages("Rtsne")
  requireNamespace("Rtsne")
  # install.packages("uwot")
  requireNamespace("uwot")

  # avoid importing `exprs` that leads to clashes
  requireNamespace("rlang")

  # install.packages("Seurat")
  # requireNamespace("Seurat")

  # BiocManager::install("TrajectoryUtils") # Didn't work
  # devtools::install_github("LTLA/TrajectoryUtils@e943606")
  # BiocManager::install("kstreet13/slingshot@25fc566")
  # requireNamespace("slingshot")

  # BiocManager::install("tradeSeq")
  # requireNamespace("tradeSeq")

  # install.packages("statmod")

  # BiocManager::install("edgeR")
  requireNamespace("edgeR")

  # https://github.com/eliocamp/ggnewscale
  # install.packages("ggnewscale")
  # requireNamespace("ggnewscale")

  # https://www.rdocumentation.org/packages/DGCA/versions/1.0.2
  # BiocManager::install("impute")
  # BiocManager::install("preprocessCore")
  # BiocManager::install("GO.db")
  # install.packages("WGCNA")
  # install.packages("DGCA")
  requireNamespace("DGCA")


  # https://github.com/danielschw188/Revelio
  # devtools::install_github('danielschw188/Revelio@e8e1492')
  requireNamespace("Revelio")

  # # install.packages("rgl")
  # requireNamespace("rgl")

  # install.packages("plotly")
  requireNamespace("plotly")

  # https://www.rdocumentation.org/packages/Hmisc/versions/4.5-0/topics/rcorr
  # install.packages("Hmisc")
  requireNamespace("Hmisc")

  # BiocManager::install("multiGSEA")
  # requireNamespace("multiGSEA")

  # install.packages("tidygraph")
  # install.packages("ggraph")
  requireNamespace("ggraph")
  requireNamespace("tidygraph")

  # https://htmlpreview.github.io/?https://github.com/aertslab/SCENIC/blob/master/inst/doc/SCENIC_Setup.html
  # BiocManager::install(c("AUCell", "RcisTarget"))
  # BiocManager::install(c("GRNBoost"))
  # BiocManager::install(c("doMC", "doRNG"))
  # devtools::install_github("aertslab/SCENIC@0585e87")
  # requireNamespace("SCENIC")
})
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/glue/libs/glue.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/xml2/libs/xml2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/colorspace/libs/colorspace.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/systemfonts/libs/systemfonts.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/svglite/libs/svglite.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bitops/libs/bitops.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RCurl/libs/RCurl.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastmap/libs/fastmap.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/cachem/libs/cachem.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vctrs/libs/vctrs.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ellipsis/libs/ellipsis.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fansi/libs/fansi.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/utf8/libs/utf8.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tibble/libs/tibble.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/purrr/libs/purrr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/dplyr/libs/dplyr.so") ...
## now dyn.load("/usr/lib/R/library/grid/libs/grid.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rcpp/libs/Rcpp.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/gridtext/libs/gridtext.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ggrepel/libs/ggrepel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidyr/libs/tidyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit/libs/bit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit64/libs/bit64.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSQLite/libs/RSQLite.so") ...
## now dyn.load("/usr/lib/R/library/parallel/libs/parallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biobase/libs/Biobase.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/S4Vectors/libs/S4Vectors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/IRanges/libs/IRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/png/libs/png.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XVector/libs/XVector.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biostrings/libs/Biostrings.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vroom/libs/vroom.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tzdb/libs/tzdb.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocParallel/libs/BiocParallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/GenomicRanges/libs/GenomicRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/matrixStats/libs/matrixStats.so") ...
## now dyn.load("/usr/lib/R/library/lattice/libs/lattice.so") ...
## now dyn.load("/usr/lib/R/library/Matrix/libs/Matrix.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DelayedArray/libs/DelayedArray.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XML/libs/XML.so") ...
## now dyn.load("/usr/lib/R/library/splines/libs/splines.so") ...
## now dyn.load("/usr/lib/R/library/survival/libs/survival.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/genefilter/libs/genefilter.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/locfit/libs/locfit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DESeq2/libs/DESeq2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/sparseMatrixStats/libs/sparseMatrixStats.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/beachmat/libs/beachmat.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/scuttle/libs/scuttle.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocNeighbors/libs/BiocNeighbors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/irlba/libs/irlba.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocSingular/libs/BiocSingular.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/beeswarm/libs/beeswarm.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/igraph/libs/igraph.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/plyr/libs/plyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/reshape2/libs/reshape2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rtsne/libs/Rtsne.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/uwot/libs/uwot.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/limma/libs/limma.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/edgeR/libs/edgeR.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastcluster/libs/fastcluster.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/base64enc/libs/base64enc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/jpeg/libs/jpeg.so") ...
## now dyn.load("/usr/lib/R/library/cluster/libs/cluster.so") ...
## now dyn.load("/usr/lib/R/library/foreign/libs/foreign.so") ...
## now dyn.load("/usr/lib/R/library/nnet/libs/nnet.so") ...
## now dyn.load("/usr/lib/R/library/rpart/libs/rpart.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/checkmate/libs/checkmate.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/backports/libs/backports.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/data.table/libs/datatable.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Hmisc/libs/Hmisc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/impute/libs/impute.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/preprocessCore/libs/preprocessCore.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/WGCNA/libs/WGCNA.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/lazyeval/libs/lazyeval.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidygraph/libs/tidygraph.so") ...

Session

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8    LC_NUMERIC=C            LC_TIME=lzh_TW         
##  [4] LC_COLLATE=en_US.UTF-8  LC_MONETARY=lzh_TW      LC_MESSAGES=en_US.UTF-8
##  [7] LC_PAPER=lzh_TW         LC_NAME=C               LC_ADDRESS=C           
## [10] LC_TELEPHONE=C          LC_MEASUREMENT=lzh_TW   LC_IDENTIFICATION=C    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_3.3.5 dplyr_1.0.7  
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.2.1             Hmisc_4.5-0                
##   [3] systemfonts_1.0.2           plyr_1.8.6                 
##   [5] igraph_1.2.6                lazyeval_0.2.2             
##   [7] splines_4.1.0               Revelio_0.1.0              
##   [9] BiocParallel_1.26.1         GenomeInfoDb_1.28.1        
##  [11] scater_1.20.1               digest_0.6.27              
##  [13] foreach_1.5.1               htmltools_0.5.1.1          
##  [15] viridis_0.6.1               GO.db_3.13.0               
##  [17] fansi_0.5.0                 checkmate_2.0.0            
##  [19] magrittr_2.0.1              memoise_2.0.0              
##  [21] ScaledMatrix_1.0.0          cluster_2.1.2              
##  [23] doParallel_1.0.16           tzdb_0.1.1                 
##  [25] limma_3.48.1                fastcluster_1.2.3          
##  [27] Biostrings_2.60.1           annotate_1.70.0            
##  [29] matrixStats_0.59.0          vroom_1.5.1                
##  [31] svglite_2.0.0               jpeg_0.1-8.1               
##  [33] colorspace_2.0-2            blob_1.2.1                 
##  [35] rvest_1.0.0                 ggrepel_0.9.1              
##  [37] xfun_0.24                   crayon_1.4.1               
##  [39] RCurl_1.98-1.3              jsonlite_1.7.2             
##  [41] org.Mm.eg.db_3.13.0         genefilter_1.74.0          
##  [43] impute_1.66.0               survival_3.2-11            
##  [45] iterators_1.0.13            glue_1.4.2                 
##  [47] kableExtra_1.3.4            gtable_0.3.0               
##  [49] zlibbioc_1.38.0             XVector_0.32.0             
##  [51] webshot_0.5.2               DelayedArray_0.18.0        
##  [53] BiocSingular_1.8.1          SingleCellExperiment_1.14.1
##  [55] BiocGenerics_0.38.0         scales_1.1.1               
##  [57] pheatmap_1.0.12             DBI_1.1.1                  
##  [59] edgeR_3.34.0                Rcpp_1.0.6                 
##  [61] htmlTable_2.2.1             viridisLite_0.4.0          
##  [63] xtable_1.8-4                gridtext_0.1.4             
##  [65] foreign_0.8-81              bit_4.0.4                  
##  [67] rsvd_1.0.5                  preprocessCore_1.54.0      
##  [69] Formula_1.2-4               stats4_4.1.0               
##  [71] DT_0.18                     htmlwidgets_1.5.3          
##  [73] httr_1.4.2                  RColorBrewer_1.1-2         
##  [75] ellipsis_0.3.2              pkgconfig_2.0.3            
##  [77] XML_3.99-0.6                scuttle_1.2.0              
##  [79] nnet_7.3-16                 sass_0.4.0                 
##  [81] uwot_0.1.10                 locfit_1.5-9.4             
##  [83] utf8_1.2.1                  dynamicTreeCut_1.63-1      
##  [85] tidyselect_1.1.1            rlang_0.4.11               
##  [87] reshape2_1.4.4              AnnotationDbi_1.54.1       
##  [89] munsell_0.5.0               tools_4.1.0                
##  [91] cachem_1.0.5                generics_0.1.0             
##  [93] RSQLite_2.2.7               evaluate_0.14              
##  [95] stringr_1.4.0               fastmap_1.1.0              
##  [97] yaml_2.2.1                  org.Hs.eg.db_3.13.0        
##  [99] knitr_1.33                  bit64_4.0.5                
## [101] tidygraph_1.2.0             purrr_0.3.4                
## [103] KEGGREST_1.32.0             sparseMatrixStats_1.4.0    
## [105] pracma_2.3.3                xml2_1.3.2                 
## [107] compiler_4.1.0              rstudioapi_0.13            
## [109] plotly_4.9.4.1              beeswarm_0.4.0             
## [111] png_0.1-7                   tibble_3.1.2               
## [113] geneplotter_1.70.0          bslib_0.2.5.1              
## [115] stringi_1.6.2               lattice_0.20-44            
## [117] DGCA_1.0.2                  Matrix_1.3-4               
## [119] ggsci_2.9                   vctrs_0.3.8                
## [121] pillar_1.6.1                lifecycle_1.0.0            
## [123] jquerylib_0.1.4             BiocNeighbors_1.10.0       
## [125] data.table_1.14.0           bitops_1.0-7               
## [127] irlba_2.3.3                 GenomicRanges_1.44.0       
## [129] latticeExtra_0.6-29         R6_2.5.0                   
## [131] gridExtra_2.3               vipor_0.4.5                
## [133] IRanges_2.26.0              codetools_0.2-18           
## [135] assertthat_0.2.1            SummarizedExperiment_1.22.0
## [137] DESeq2_1.32.0               withr_2.4.2                
## [139] S4Vectors_0.30.0            GenomeInfoDbData_1.2.6     
## [141] parallel_4.1.0              ggtext_0.1.1               
## [143] rpart_4.1-15                grid_4.1.0                 
## [145] pathlibr_0.1.0              beachmat_2.8.0             
## [147] tidyr_1.1.3                 rmarkdown_2.9              
## [149] DelayedMatrixStats_1.14.0   MatrixGenerics_1.4.0       
## [151] Rtsne_0.15                  Biobase_2.52.0             
## [153] WGCNA_1.70-3                base64enc_0.1-3            
## [155] ggbeeswarm_0.6.0

Generic

RNG
set.seed(43)
Dual of the python star-star operator, sort of
dual <- function(f, g) {
  if (missing(g)) {
    stopifnot(class(f) == "function")
    (function(L) do.call(f, Filter(Negate(is.null), L)))
  } else {
    stopifnot(class(f) == "list")
    stopifnot(class(g) == "function")
    dual(g)(f)
  }
}

Example 1:

list(
  biggle = list(A = "hello", B = "world"),
  wiggle = list(A = "Hello", B = "World", C = NULL)
) %>%
  lapply(dual(function(B, A) paste(A, B)))
## $biggle
## [1] "hello world"
## 
## $wiggle
## [1] "Hello World"

Example 2:

list(
  A = data.frame(x = c("xa1", "xa2"), y = c("ya1", "ya2")),
  B = data.frame(x = c("xb1", "xb2"), y = c("yb1", "yb2")),
  C = data.frame(x = c("xc1", "xc2"), y = c("yc1", "yc2"))
) %>%
  # No need for the brackets!
  dual(cbind)
##   A.x A.y B.x B.y C.x C.y
## 1 xa1 ya1 xb1 yb1 xc1 yc1
## 2 xa2 ya2 xb2 yb2 xc2 yc2
Memory drain culprits
culprits <- function() {
  search()[[1]] %>%
    ls(name = .) %>%
    sapply(. %>% get() %>% object.size() %>% "/"(1e6)) %>%
    data.frame(size = .) %>%
    arrange(size) %>%
    mutate(cumsize = cumsum(size)) %>%
    arrange(desc(size)) %>%
    mutate(across(everything(), ~ signif(.x, digits = 2))) %>%
    mutate(unit = "Mb")
}

Example:

culprits() %>% head(3)
##           size cumsize unit
## dual     0.074   0.120   Mb
## glue     0.022   0.048   Mb
## culprits 0.011   0.026   Mb

Paths

Root
for (i in 1:2) {
  BASEPATH <- pathlibr::Path$new(Sys.glob(paste(c("work", ".."), "*FibroAct", sep = "/")))
  setwd(BASEPATH$show)
}

print(paste("Work path:", BASEPATH))
## [1] "Work path: ../20210502-FibroAct"
stopifnot("main.Rmd" %in% names(BASEPATH$dir))
Finding paths
path_to <- (function(.) Sys.glob(BASEPATH$join("../..")$join(.)$show))
out_file <- (function(.) BASEPATH$join("output")$join(.)$show)
dir.create(out_file(""), showWarnings = FALSE)
Memoization to disk
memo <- function(f) {
  memoise::memoise(
    f,
    cache = cachem::cache_disk(BASEPATH$join("main_cache/memoized")$show)
  )
}

Tables

Basic table printing.

kable <- function(.) {
  kableExtra::kbl(., align = "c") %>%
    kableExtra::kable_paper("hover", full_width = FALSE)
}

Options for fancy datatable printing.

options(DT.options = list(
  pageLength = 16,
  language = list(search = "Filter:"),
  scrollX = TRUE
))

Converting a symbol or GO ID to a link.

as.link <-
  function(x) {
    dplyr::case_when(
      # If x is like GO:003453
      startsWith(x, "GO:") ~ paste0(
        "<a target='_blank'",
        "href='", "http://amigo.geneontology.org/amigo/term/", x, "'",
        ">", x, "</a>"
      ),
      # Otherwise try this
      TRUE ~ paste0(
        "<a target='_blank'",
        "href='", "http://www.informatics.jax.org/marker/summary?nomen=", x, "'",
        ">", x, "</a>"
      )
    )
  }

Example

data.frame(x = c("GO:00345", "Lama")) %>%
  mutate(link = as.link(x)) %>%
  DT::datatable(width = "100%")

Printing a sequence of tables

kable_all <- function(tt) {
  for (t in tt) {
    t %>%
      table() %>%
      t() %>%
      kable() %>%
      print()
  }
}
# with results='asis'
list(
  a = c(c(1:5), c(2:5), c(3:5)),
  b = c(c(1:6), c(2:6), c(3:6)),
  b = c(c(1:7), c(2:7), c(3:7)),
  b = c(c(1:9), c(2:9), c(3:9))
) %>% kable_all()
1 2 3 4 5
1 2 3 3 3
1 2 3 4 5 6
1 2 3 3 3 3
1 2 3 4 5 6 7
1 2 3 3 3 3 3
1 2 3 4 5 6 7 8 9
1 2 3 3 3 3 3 3 3

Plotting

ggplot2::theme_set(theme_light(base_size = 15))
gglast <- function(what) {
  p <- ggplot2::last_plot()
  q <- p$mapping[[deparse(substitute(what))]]
  rlang::eval_tidy(q, data = p$data)
}

# example: scale_fill_manual(values = color_map, breaks = gglast(fill))
# Converts an annotation column (e.g. sce$group)
# to a named vector of colors (group -> color)
# List of palettes: grDevices::hcl.pals()
# http://colorspace.r-forge.r-project.org/reference/hcl_palettes.html
ramp <- function(anno, palette = "Temps") {
  anno %>%
    as.character() %>%
    unique() %>%
    c(NA) %>%
    data.frame(
      item = .,
      # https://developer.r-project.org/Blog/public/2019/04/01/hcl-based-color-palettes-in-grdevices/
      color = length(.) %>% colorRampPalette(grDevices::hcl.colors(n = ., palette = palette))(.)
    ) %>%
    tidyr::drop_na() %>%
    pull(color, item)
}

Example:

ramp(c("Grp1", "Grp2", "Grp1"), palette = "Dark 3") %>%
  t() %>%
  kable()
Grp1 Grp2
#E16A86 #50A315
RGB <- function(arr, alpha = 1) {
  stopifnot(ncol(arr) == 3)

  arr %>%
    as.data.frame() %>%
    `/`(255) %>%
    mutate(alpha = alpha) %>%
    mutate(colors = apply(., 1, \(x) rgb(x[1], x[2], x[3], alpha = x[["alpha"]]))) %>%
    pull(colors)
}

Example:

(1:7) %>%
  percent_rank() %>%
  (colorRamp(c("blue", "red"))) %>%
  RGB(alpha = 0.5) %>%
  t() %>%
  kable()
#0000FF80 #2B00D580 #5500AA80 #80008080 #AA005580 #D5002A80 #FF000080

Colors for consistency; may be overwritten below.

colors <- unlist(
  c(
    list(
      `374_VLMC` = "aquamarine3",
      `375_VLMC` = "chartreuse4",
      `376_VLMC` = "chartreuse3",
      `FB1` = "cornflowerblue",
      `FB2` = "blue",
      `Ctrl` = "chartreuse4",
      `EAE` = "coral2",
      `VLMC1` = "chartreuse1",
      `VLMC3` = "coral3",
      `VLMC4` = "coral4",
      `healthy` = "green",
      `EAE1` = "coral1",
      `EAE2` = "coral3",
      `healthy1` = "aquamarine",
      `healthy2` = "chartreuse3",
      `healthy3` = "chartreuse4",
      `positive` = "Red4",
      `negative` = "Steelblue1"
    ),
    ramp(c("G1.S", "S", "G2", "G2.M", "M.G1"), palette = "Dark 3")
  )
)
colors %>%
  unlist() %>%
  data.frame(c = .) %>%
  mutate(item = rownames(.)) %>%
  ggplot(aes(y = item, x = 1)) +
  geom_tile(fill = colors, stat = "identity") +
  scale_y_discrete(limits = rev(names(colors))) +
  theme(
    axis.ticks.x = element_blank(),
    axis.text.x = element_blank(),
    axis.title = element_blank()
  )
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/farver/libs/farver.so") ...

Zoomable figures.

///*
$(document).ready(
  function() {
    $("img.zoomable").on("click", function(evt) {
      if (!evt.originalEvent.shiftKey && evt.originalEvent.altKey) {
        var el = $(this).clone();
        el.attr("width", "2048px");
        window.open().document.write(el.wrap('<p>').parent().html());
        return false;
      }
    });
  }
);
//*/

Alt+click on a figure to zoom:

list(
  randu %>% ggplot(aes(x = x, y = y)) +
    geom_point() +
    ggtitle("Zoomable 1"),
  randu %>% ggplot(aes(x = x, y = z)) +
    geom_point() +
    ggtitle("Zoomable 2")
)

Click-scrollable figures (like slides):

$(document).ready(
  function() {
    $("img.scrollable").css({
      'position': 'absolute', 'top': 0, 'left': 0, 'z-index': 100000,
    });
    
    var parent = $("img.scrollable").parent();
    
    parent.css({'position': 'relative'});
    parent.find('img:first').css({'position': 'relative'});
    
    // associate a 'history' stack with each parent
    parent.each(function(i, el) { $(el).data("stack", new Array()); })
    
    $("img.scrollable").on("click", function(evt) {
      if (evt.originalEvent.altKey) return true;
      
      if (!evt.originalEvent.shiftKey) {
        var last = $(this);
        last.parent().data("stack").push(last);
        if (last) last.css({'z-index': parseInt(last.css('z-index'), 10) - 1});
      } else {
        var last = $(this).parent().data("stack").pop();
        if (last) last.css({'z-index': parseInt(last.css('z-index'), 10) + 1});
      }
      
      evt.stopPropagation();
      return false;
    });
  }
);

Click a figure to slide:

list(
  randu %>% ggplot(aes(x = x, y = y)) +
    geom_point() +
    ggtitle("Slide 1"),
  randu %>% ggplot(aes(x = x, y = z)) +
    geom_point() +
    ggtitle("Slide 2")
)

Dataframes

# Use first column as index
by_col1 <- (function(.) tibble::column_to_rownames(., colnames(.)[1]))
# Use index as new column `name`
ind2col <- (function(., name) tibble::rownames_to_column(., var = name))
take_rows <- function(df, rows = NULL) {
  if (!is.null(rows)) {
    return(df[rows[rows %in% rownames(df)], , drop = FALSE])
  } else {
    return(df)
  }
}
# For reading wide tables
Sys.setenv("VROOM_CONNECTION_SIZE" = 2**19)

from_tsv <- function(filename, sep = "\t") {
  filename %>%
    vroom::vroom(del = sep, progress = FALSE) %>%
    suppressMessages() %>%
    by_col1()
}
# Writes dataframe to tab-separated file
# Precede by `ind2col("name for index")` to write the row names
to_tsv <- function(df, fn, sep = "\t") {
  vroom::vroom_write(df, out_file(fn), delim = sep)
}
Random sub-dataframe
# A small random sub-dataframe (intended for genes x samples)
shample <- function(df) {
  df[
    sample(rownames(df), min(nrow(df), 333)),
    sample(colnames(df), min(ncol(df), 33))
  ]
}

SCE

# Constructs a `SingleCellExperiment` checking consistency of sample names
make_sce <- function(counts, coldata) {
  # Check that samples in counts = genes x samples
  # are ordered as in coldata = samples x metafields
  stopifnot(assertthat::are_equal(colnames(counts), rownames(coldata)))

  SingleCellExperiment::SingleCellExperiment(
    assays = list(counts = counts), colData = coldata
  )
}
mock_sce <- function(genes, ncells) {
  sce <- scater::mockSCE(ngenes = length(genes), ncells = ncells)

  sce <- make_sce(
    sce@assays@data$counts %>% as.data.frame(row.names = genes),
    sce@colData %>% as.data.frame() %>% rename(celltype = Mutation_Status)
  )

  sce
}
drop_empty <- function(sce) {
  sce[, colSums(abs(sce@assays@data$counts)) != 0]
}
# Normalizes samplewise to sum(expr) = 1
# Consider using sce %>% scater::logNormCounts() %>% ...
norm1 <- function(sce) {
  sce@assays@data$counts %<>%
    t() %>%
    {
      (.) / (rowSums(.))
    } %>%
    t()

  sce
}
# Example
mock_sce(genes = LETTERS[1:10], ncells = 3) %>%
  take_rows(LETTERS[1:5]) %>%
  drop_empty() %>%
  norm1() %>%
  scater::logNormCounts() %>%
  scater::aggregateAcrossFeatures(ids = c("V", "C", "C", "C", "V"), average = TRUE)
## class: SingleCellExperiment 
## dim: 2 3 
## metadata(0):
## assays(1): counts
## rownames(2): C V
## rowData names(0):
## colnames(3): Cell_001 Cell_002 Cell_003
## colData names(4): celltype Cell_Cycle Treatment sizeFactor
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Gene meta

Taxons

taxid <- list(mm = 10090, hs = 9606)
mm hs
10090 9606

EntrezID

symbol2geneid <- function(genes) {
  data.frame(symbol = genes) %>%
    merge(
      y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
      all.x = TRUE,
      by = "symbol",
      sort = FALSE # !
    ) %>%
    pull(gene_id, symbol) %>%
    `[`(genes) # restore the original order
}
c("Jun", "Junb", "Junc") %>%
  data.frame(symbol = .) %>%
  mutate(gene_id = symbol2geneid(symbol)) %>%
  kable()
symbol gene_id
Jun 16476
Junb 16477
Junc NA

Dealiasing

dealias <- function(genes, species = "Mm") {
  # `alias2SymbolTable` returns of vector of the same length as the vector of aliases.
  # If an alias maps to more than one symbol, lowest Entrez ID number is returned.
  # If an alias can't be mapped, then NA is returned.

  genes %>%
    limma::alias2SymbolTable(species = species) %>%
    {
      is.na(.) %>% {
        if (any(.)) {
          warning(paste(
            "Dealiasing with `limma` genes resulted in", sum(.), "NAs, e.g. at:", 
            paste(sample(genes[.], size = min(5, sum(.))), collapse = ", ")
          ))
        }
      }
      
      # Keep old names instead of NA
      . <- dplyr::coalesce(., genes)
      
      stopifnot(all(!is.na(.)))
      stopifnot(mean((.) == genes) > 0.7)

      (.)
    }
}
dealias_sce <- function(sce) {
  rownames(sce) %<>% dealias()
  sce
}

Omnipath

PPI
omnipath.in <-
  path_to("*/*/*/omnipath_in.csv.gz") %>%
  from_tsv() %>%
  select(
    source, source_genesymbol, target, target_genesymbol,
    organism,
    is_inhibition, is_stimulation
  )

Preview:

TF-target
omnipath.tf <-
  path_to("*/*/*/omnipath_tf.csv.gz") %>%
  from_tsv() %>%
  select(
    source, source_genesymbol, target, target_genesymbol,
    organism,
    is_inhibition, is_stimulation
  )

Preview:

Ligand-receptor
omnipath.lr <-
  path_to("*/*/*/omnipath_lr.csv.gz") %>%
  from_tsv()

Preview:

Annotating genes

We use Omnipath to annotate genes with their role using the following function.

# For a vector of gene symbols returns
# a vector containing the role of each symbol
symbol2role <- function(symbol, organism = taxid$mm) {
  f <- function(df) {
    df %>% filter(organism == organism)
  }

  data.frame(
    TF = if_else(symbol %in% f(omnipath.tf)$source_genesymbol, "TF", ""),
    Tgt = if_else(symbol %in% f(omnipath.tf)$target_genesymbol, "Tgt", ""),
    Lig = if_else(symbol %in% f(omnipath.lr)$source_genesymbol, "Lig", ""),
    Rec = if_else(symbol %in% f(omnipath.lr)$target_genesymbol, "Rec", "")
  ) %>%
    apply(MARGIN = 1, FUN = function(row) {
      if (any(nzchar(row))) {
        paste(row[nzchar(row)], collapse = "/")
      } else {
        "--"
      }
    })
}

For example:

data.frame(symbol = c("Jun", "Notch1", "Xyz")) %>%
  mutate(role = symbol2role(symbol)) %>%
  kable()
symbol role
Jun TF/Tgt
Notch1 Tgt/Lig/Rec
Xyz

Annotation

The following provides a gene name (description) and a longer explanation (summary).

gene.summary <-
  taxid %>%
  lapply(
    function(i) {
      path_to(paste0("*/NCBI/gene/*/", names(which(. == i)), ".tsv.gz")) %>%
        from_tsv() %>%
        ind2col("gene_id")
    }
  )

Preview:

The following function sets up a gene id/symbol homology table.

# homology(taxid$mm, taxid$hs) %>% head
# gives a dataframe like
#   gene_id.y gene_id.x  symbol.x symbol.y
# 1         1    117586      A1bg     A1BG
# 2         2    232345       A2m      A2M
# 3         9     17961      Nat2     NAT1
# 4        10     17960      Nat1     NAT2
# 5        12     16625 Serpina3c SERPINA3
# 6        13     67758     Aadac    AADAC
homology <- function(taxid.x, taxid.y) {
  # Assume that `gene_id` is unique
  symbols <- rbind(
    org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
    org.Hs.eg.db::org.Hs.egSYMBOL2EG %>% as.data.frame()
  )


  path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
    from_tsv() %>%
    select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
    filter(taxid %in% c(taxid.x, taxid.y)) %>%
    mutate(taxid = if_else(taxid == taxid.x, "x", "y")) %>%
    stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
    # "multiple rows match for taxid"
    suppressWarnings() %>%
    select(gene_id.x, gene_id.y) %>%
    merge(y = symbols %>% select(gene_id.x = gene_id, symbol.x = symbol), by = "gene_id.x") %>%
    merge(y = symbols %>% select(gene_id.y = gene_id, symbol.y = symbol), by = "gene_id.y")
}

For example:

homology(taxid$mm, taxid$hs) %>%
  head(11) %>%
  {
    .[, order(colnames(.))]
  } %>%
  mutate(seems.resonable = (tolower(symbol.x) == tolower(symbol.y))) %>%
  rbind("...") %>%
  kable()
gene_id.x gene_id.y symbol.x symbol.y seems.resonable
117586 1 A1bg A1BG TRUE
232345 2 A2m A2M TRUE
17961 9 Nat2 NAT1 FALSE
17960 10 Nat1 NAT2 FALSE
16625 12 Serpina3c SERPINA3 FALSE
67758 13 Aadac AADAC TRUE
227290 14 Aamp AAMP TRUE
11298 15 Aanat AANAT TRUE
234734 16 Aars AARS1 FALSE
268860 18 Abat ABAT TRUE
11303 19 Abca1 ABCA1 TRUE

We use that to add description/summary to mouse genes by homology.

gene.summary$mm %<>%
  merge(
    x = .,
    y = merge(
      x = gene.summary$hs %>%
        select(gene_id, summ = summary, desc = description) %>%
        mutate(desc = if_else(is.na(desc), desc, paste("By homology:", desc))) %>%
        mutate(summ = if_else(is.na(summ), summ, paste("By homology:", summ))),
      y = path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
        from_tsv() %>%
        filter(NCBI_Taxon_ID %in% taxid) %>%
        select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
        stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
        select(gene_id = gene_id.9606, alt_gene_id = gene_id.10090),
      by = "gene_id",
      all = FALSE
    ) %>%
      select(gene_id = alt_gene_id, alt_desc = desc, alt_summ = summ),
    by = "gene_id",
    all.x = TRUE,
    all.y = FALSE,
    sort = FALSE
  ) %>%
  mutate(summary = if_else(!is.na(summary), summary, alt_summ)) %>%
  mutate(description = if_else(!is.na(description), description, alt_desc)) %>%
  select(-alt_desc, -alt_summ)
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=10090: first taken
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=9606: first taken

Preview:

Gene sets

Genes of interest

Define gene sets

The following defines the core set of genes of interest. More is added below from the “Daneman clusters”.

goi <-
  list(
    neuroinflammation = list(
      name = "Neuroinflammation",
      genes = unique(c(
        # "Early response" transcription factors: Fos, Fosb, Junb
        # From 2018-Vanlandewijck-He-...-Betsholtz:
        # "A molecular atlas of cell types and zonation in the brain vasculature"
        c("Fos", "Fosb", "Junb", "Egr1", "Jun"),

        # Wikipathways Neuroinflammation:
        # https://www.wikipathways.org/instance/WP4919_r111814
        # Downloaded from
        # http://www.gsea-msigdb.org/gsea/msigdb/cards/WP_NEUROINFLAMMATION.html
        c("Ascc1", "Chuk", "Fos", "Jun", "Mapk14", "Mapk8", "mt-Co1", "mt-Co2", "Mtor", "Nfkbia", "Nos2", "Rela", "Tlr4")
      ))
    ),

    # From 2018-Vanlandewijck-He-...-Betsholtz
    # Pericyte markers:
    # Pdgfrb, Cspg4, and Des
    # SMC:
    # Acta2 and Tagln
    # Fibroblast markers:
    # Pdgfra, Lum and Dcn

    pvf = list(
      name = "PVF",
      genes = c(
        # From 2021-Manberg-...-Lewandowski, p.643
        "Spp1", "Col6a1", "Col1a1", "Mmp2",
        # From 2018-Vanlandewijck-He-...-Betsholtz
        "Pdgfra", "Lum", "Dcn"
      )
    ),

    # Collagen
    col = list(
      name = "Collagen",
      genes = c(
        "Col10a1", "Col11a1", "Col11a2", "Col12a1", "Col13a1", "Col14a1",
        "Col15a1", "Col16a1", "Col17a1", "Col18a1", "Col19a1", "Col1a1",
        "Col1a2", "Col20a1", "Col22a1", "Col23a1", "Col24a1", "Col25a1",
        "Col26a1", "Col27a1", "Col28a1", "Col2a1", "Col3a1", "Col4a1",
        "Col4a2", "Col4a3", "Col4a3bp", "Col4a4", "Col4a5", "Col4a6",
        "Col5a1", "Col5a2", "Col5a3", "Col6a1", "Col6a2", "Col6a3",
        "Col6a4", "Col6a5", "Col6a6", "Col7a1", "Col8a1", "Col8a2",
        "Col9a1", "Col9a2", "Col9a3", "Colec10", "Colec11", "Colec12",
        "Colgalt1", "Colgalt2", "Colq"
      )
      # Obtained using
      # sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^Col")] } %>% sort()
    ),

    # Mitochondrial
    mt = list(
      name = "Mitochondrial",
      genes = c(
        "mt-Atp6", "mt-Atp8", "mt-Co1", "mt-Co2", "mt-Co3", "mt-Cytb",
        "mt-Nd1", "mt-Nd2", "mt-Nd3", "mt-Nd4", "mt-Nd4l", "mt-Nd5", "mt-Nd6"
      )
    ),
    # Obtained using (e.g.)
    # sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^mt-")] } %>% sort()

    air = list(
      name = "Aerobic respiration",
      genes = c(
        # GO:1903715 (regulation of aerobic respiration)
        # http://www.informatics.jax.org/vocab/gene_ontology/GO:1903715
        c("Actn3", "Akt1", "Bnip3", "Cbfa2t3", "Hif1a", "Ide", "Nop53", "Shmt2", "Trpv4", "Vcp"),

        # GO:0009060 (aerobic respiration) minus GO:1903715 and minus 'mt-Co3'
        # http://www.informatics.jax.org/vocab/gene_ontology/GO:0009060
        c("Aco1", "Aco2", "Adsl", "Afg1l", "Atp5f1d", "Bloc1s1", "Cat", "Cox10", "Cox4i1", "Cox4i2", "Cox5a", "Cox5b", "Cox6a1", "Cox6a2", "Cox7c", "Cs", "Csl", "Cycs", "Cyct", "Dhtkd1", "Dlat", "Dlst", "Fh", "Fxn", "Idh1", "Idh2", "Idh3a", "Idh3b", "Idh3g", "Ireb2", "Mdh1", "Mdh1b", "Mdh2", "Mtco1", "Mtfr1", "Mtfr1l", "Mtfr2", "Mtnd1", "Mtnd4", "Mup1", "Mup11", "Mup2", "Mup3", "Mup4", "Mup5", "Mup6", "Ndufs4", "Ndufs7", "Ndufs8", "Ogdh", "Ogdhl", "Oxa1l", "Pank2", "Pdha1", "Pdha2", "Pdhb", "Proca1", "Q8BPC6", "Sdha", "Sdhaf2", "Sdhb", "Sdhc", "Sdhd", "Sirt3", "Sucla2", "Suclg1", "Suclg2", "Ucn", "Uqcr10", "Uqcrb", "Uqcrh")
      )
    ),
    
    fib_mig = list(
      name = "Reg. fib. mig.",
      genes = c(
        # GO:0010762 (regulation of fibroblast migration)
        # http://www.informatics.jax.org/vocab/gene_ontology/GO:0010762
        c("Acta2", "Actr3", "Adipor2", "Ager", "Akap12", "Akt1", "Appl1", "Appl2", "Arhgap4", "Bag4", "Braf", "Cln3", "Coro1c", "Cygb", "Ddr2", "Dmtn", "Fer", "Fgf2", "Fgfr1", "Gna12", "Has1", "Hyal2", "Itgb1", "Itgb1bp1", "Itgb3", "MACIR", "Mmp1a", "Mta2", "Pak1", "Pak3", "Prkce", "Prr5l", "Ptk2", "Rac1", "Rcc2", "Rffl", "Sdc4", "Slc8a1", "Tgfb1", "Thbs1", "Tsc2", "Uts2", "Wdpcp")
      )
    ),
    
    glycolysis = list(
      name = "Anaerobic glycolysis",
      genes = c(
        # GO:0006096 (glycolytic process / anaerobic glycolysis)
        # http://www.informatics.jax.org/vocab/gene_ontology/GO:0006096
        c("Actn3", "Adpgk", "Aldoa", "Aldoart1", "Aldoart2", "Aldob", "Aldoc", "App", "Bpgm", "Cbfa2t3", "Ddit4", "Dhtkd1", "Eif6", "Eno1", "Eno2", "Eno3", "Eno4", "Entpd5", "Ep300", "Esrrb", "Fbp1", "Foxk1", "Foxk2", "Gale", "Galk1", "Galt", "Gapdh", "Gapdhs", "Gck", "Git1", "Gm10358", "Gm11214", "Gm12117", "Gm15294", "Gm3839", "Gpd1", "Gpi", "Hdac4", "Hif1a", "Hk1", "Hk2", "Hk3", "Hkdc1", "Htr2a", "Ier3", "Ifng", "Igf1", "Il3", "Ins2", "Insr", "Jmjd8", "Khk", "Mif", "Mlxipl", "Mpi", "Mtch2", "Myc", "Myog", "Ncor1", "Nupr1", "Ogdh", "Ogt", "P2rx7", "Pfkfb2", "Pfkl", "Pfkm", "Pfkp", "Pgam1", "Pgam2", "Pgk1", "Pgk2", "Pklr", "Pkm", "Ppara", "Ppargc1a", "Prkaa1", "Prkaa2", "Prkag2", "Prkag3", "Prxl2c", "Psen1", "Sirt6", "Slc2a6", "Slc4a1", "Slc4a4", "Stat3", "Tigar", "Tkfc", "Tpi1", "Trex1", "Zbtb20", "Zbtb7a")
      )
    ),

    # https://reactome.org/PathwayBrowser/#/R-MMU-877300&SEL=R-MMU-873829&DTAB=MT
    # Motivated by https://www.nature.com/articles/s41593-020-00770-9
    ifng_signaling = list(
      name = "Interferon gamma signaling",
      genes = c("Ifngr1", "Ifngr2", "Jak1", "Jak2", "Ifng")
    ),
    
    # Genes that are upregulated in EAE simultaneously in CB and Dm
    # (collected 2021-07-08)
    eae_common_up = list(
      name = "Up in EAE in CB & Dm",
      genes = c("Wif1", "Flrt2", "Scgb1a1", "Tcim", "Pcdh19", "Ifi47", "Aqp1", "Npy1r", "Serpina3g", "Ak1", "Kng1", "Plac8", "Rai14", "Nup50", "Cd74", "Adam12", "Tnfaip2", "Cd44", "Zfp608", "Cldn11", "Chst2")
    ),
    
    # 2020-Muhl-...-Betsholtz (Single-cell analysis uncovers fibroblast heterogeneity and criteria for fibroblast and mural cell identification and discrimination)
    FB45 = list(
      name = "FB45 markers from Muhl et al., 2020",
      genes = c("Nav1", "Dpt", "Hsd11b1", "Cd34", "Celf2", "Entpd2", "Col5a1", "Gsn", "Slc43a3", "S100a16", "S100a10", "Olfml3", "Lpar1", "Htra3", "Ugdh", "Pdgfra", "ENSMUSG00000107314", "Medag", "Col1a2", "Fbln2", "Mfap5", "Mgst1", "Lsp1", "Mmp2", "Dpep1", "Loxl1", "Pcolce2", "Bicc1", "Dcn", "Lum", "Gfpt2", "Adamts2", "Mfap4", "Serpinf1", "Col1a1", "Abca8a", "Rnase4", "Scara5", "Ly6a", "Fbln1", "Igfbp6", "Heg1", "Ccdc80", "Pi16", "C3")
    )
  )
Collector of genes from the genesets of interest
# Use a function because the list may change
get_goi_genes <- function() {
  goi %>%
    lapply(as.data.frame) %>%
    dual(base::rbind) %>%
    pull(genes) %>%
    unique()
}
get_goi_genes()[1:7] %>%
  c("...") %>%
  t() %>%
  kable()
Fos Fosb Junb Egr1 Jun Ascc1 Chuk
Inferring gene set from symbol
# Return a vector `goi_set` indicating the gene set for each `symbol`
symbol2goiset <- function(genes) {
  df <- data.frame(symbol = genes, goi_set = "Other")

  for (goi in goi) {
    df %<>% mutate(goi_set = if_else(symbol %in% goi$genes, goi$name, goi_set))
  }

  (df %>% pull(goi_set, symbol))[genes]
}
Function to show gene info
show_gene_info_table <- function(df) {
  df$symbols # need this column

  df %>%
    mutate(role = symbol2role(symbol)) %>%
    mutate(goi_set = symbol2goiset(symbol)) %>%
    mutate(gene_id = symbol2geneid(symbol)) %>%
    merge(gene.summary$mm, all.x = TRUE, by = "gene_id", sort = FALSE) %>%
    mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
    mutate(description = paste0("<span style='font-size:70%'>", description, "</span>")) %>%
    select(-summary) %>%
    mutate(symbol = as.link(symbol)) %>%
    DT::datatable(rownames = NULL, escape = FALSE, width = "100%")
}
get_goi_genes() %>%
  sample(size = 3) %>%
  data.frame(symbol = .) %>%
  rbind("...") %>%
  show_gene_info_table()
Plot from PCA
# TODO: normalize with edgeR?
# Plots the PCA, showing the expression of genes
show_goi_pca <- function(sce, genes, group.by = "celltype", colors = ramp(as.character(sce@colData[[group.by]]))) {
  cbind(
    # Reduced dim coordinates
    sce@int_colData$reducedDims$PCA %>%
      as.data.frame(row.names = colnames(sce)) %>%
      select(x = PC1, y = PC2),
    # Metadata
    sce@colData %>%
      as.data.frame(row.names = colnames(sce)) %>%
      select(celltype = all_of(group.by)),
    # Expression data for genes-of-interest
    sce@assays@data$counts %>%
      log1p() %>%
      `[`(genes[genes %in% rownames(.)], ) %>%
      t()
  ) %>%
    # Make long table
    reshape2::melt(
      # Keep as separate columns:
      id.vars = c("x", "y", "celltype"),
      # These columns will be stacked, in a column with this name:
      measure.vars = genes, variable.name = "symbol"
    ) %>%
    # Plot
    ggplot(aes(x = x, y = y, color = celltype)) +
    geom_point(aes(size = value), alpha = 0.4) +
    scale_color_manual(values = colors, breaks = gglast(colour)) +
    # theme_void() +
    theme(
      axis.title = element_blank(),
      axis.text = element_blank(),
    ) +
    labs(color = group.by, size = "Expression") +
    facet_wrap(vars(symbol))
}
Example
get_goi_genes() %>%
  mock_sce(genes = ., ncells = 77) %>%
  scater::logNormCounts() %>%
  scater::runPCA() %>%
  show_goi_pca(goi$pvf$genes, group.by = "Cell_Cycle", colors = list(`G0` = "Red", `G1` = "Blue", `G2M` = "Orange", `S` = "DarkGreen", `Dead` = "Black"))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.

Agg. by sample group

# Aggregates genes by `group.by` for each geneset from `goi`
goi_agg_by_group <- function(sce, group.by = "celltype") {
  goi %>%
    lapply(function(goi) {
      (dual(rbind))(as.list(group.by) %>% lapply(function(group.by) {
        if (any(goi$genes %in% rownames(sce))) {
          cbind(
            # Metadata
            sce@colData %>%
              as.data.frame(row.names = colnames(sce)) %>%
              select(subgroup = all_of(group.by)),
            # Expression data for genes-of-interest
            sce %>%
              scater::logNormCounts() %>%
              SingleCellExperiment::logcounts() %>%
              take_rows(goi$genes) %>%
              t()
          ) %>%
            # Aggregate by sample
            group_by(subgroup) %>%
            summarise(across(everything(), mean)) %>%
            # Make long table
            reshape2::melt(
              # Keep as separate columns:
              id.vars = c("subgroup"),
              # Other columns will be stacked, in a column with this name:
              variable.name = "symbol"
              # A column `value` contains their values
            ) %>%
            mutate(group = group.by) %>%
            mutate(goi_set = goi$name)
        }
      }))
    }) %>%
    # Relabel the elements of the list
    dual(rbind) %>%
    split(.$goi_set)

  # cf.
  # https://stackoverflow.com/questions/33775239/emulate-split-with-dplyr-group-by-return-a-list-of-data-frames
}
Example
get_goi_genes() %>%
  mock_sce(genes = ., ncells = 77) %>%
  goi_agg_by_group(group.by = "Treatment") %>%
  lapply(head)
## $`Aerobic respiration`
##       subgroup symbol    value     group             goi_set
## air.1   treat1  Actn3 9.260463 Treatment Aerobic respiration
## air.2   treat2  Actn3 9.165972 Treatment Aerobic respiration
## air.3   treat1   Akt1 3.213792 Treatment Aerobic respiration
## air.4   treat2   Akt1 4.229850 Treatment Aerobic respiration
## air.5   treat1  Bnip3 4.846157 Treatment Aerobic respiration
## air.6   treat2  Bnip3 4.957914 Treatment Aerobic respiration
## 
## $`Anaerobic glycolysis`
##              subgroup symbol    value     group              goi_set
## glycolysis.1   treat1  Actn3 9.260463 Treatment Anaerobic glycolysis
## glycolysis.2   treat2  Actn3 9.165972 Treatment Anaerobic glycolysis
## glycolysis.3   treat1  Adpgk 4.983677 Treatment Anaerobic glycolysis
## glycolysis.4   treat2  Adpgk 4.219723 Treatment Anaerobic glycolysis
## glycolysis.5   treat1  Aldoa 7.065130 Treatment Anaerobic glycolysis
## glycolysis.6   treat2  Aldoa 7.114694 Treatment Anaerobic glycolysis
## 
## $Collagen
##       subgroup  symbol    value     group  goi_set
## col.1   treat1 Col10a1 2.944400 Treatment Collagen
## col.2   treat2 Col10a1 3.073348 Treatment Collagen
## col.3   treat1 Col11a1 3.344087 Treatment Collagen
## col.4   treat2 Col11a1 3.027965 Treatment Collagen
## col.5   treat1 Col11a2 5.257603 Treatment Collagen
## col.6   treat2 Col11a2 5.521592 Treatment Collagen
## 
## $`FB45 markers from Muhl et al., 2020`
##        subgroup  symbol     value     group                             goi_set
## FB45.1   treat1    Nav1 1.2959524 Treatment FB45 markers from Muhl et al., 2020
## FB45.2   treat2    Nav1 0.6757902 Treatment FB45 markers from Muhl et al., 2020
## FB45.3   treat1     Dpt 8.2847228 Treatment FB45 markers from Muhl et al., 2020
## FB45.4   treat2     Dpt 7.8005161 Treatment FB45 markers from Muhl et al., 2020
## FB45.5   treat1 Hsd11b1 8.5902541 Treatment FB45 markers from Muhl et al., 2020
## FB45.6   treat2 Hsd11b1 9.0564841 Treatment FB45 markers from Muhl et al., 2020
## 
## $`Interferon gamma signaling`
##                  subgroup symbol     value     group                    goi_set
## ifng_signaling.1   treat1 Ifngr1 0.7895064 Treatment Interferon gamma signaling
## ifng_signaling.2   treat2 Ifngr1 1.5765212 Treatment Interferon gamma signaling
## ifng_signaling.3   treat1 Ifngr2 2.5450688 Treatment Interferon gamma signaling
## ifng_signaling.4   treat2 Ifngr2 1.9747724 Treatment Interferon gamma signaling
## ifng_signaling.5   treat1   Jak1 8.3130972 Treatment Interferon gamma signaling
## ifng_signaling.6   treat2   Jak1 8.2689683 Treatment Interferon gamma signaling
## 
## $Mitochondrial
##      subgroup  symbol    value     group       goi_set
## mt.1   treat1 mt-Atp6 8.761058 Treatment Mitochondrial
## mt.2   treat2 mt-Atp6 8.711053 Treatment Mitochondrial
## mt.3   treat1 mt-Atp8 1.139951 Treatment Mitochondrial
## mt.4   treat2 mt-Atp8 1.423559 Treatment Mitochondrial
## mt.5   treat1  mt-Co1 5.973012 Treatment Mitochondrial
## mt.6   treat2  mt-Co1 6.475837 Treatment Mitochondrial
## 
## $Neuroinflammation
##                     subgroup symbol    value     group           goi_set
## neuroinflammation.1   treat1    Fos 8.279289 Treatment Neuroinflammation
## neuroinflammation.2   treat2    Fos 8.531285 Treatment Neuroinflammation
## neuroinflammation.3   treat1   Fosb 1.178257 Treatment Neuroinflammation
## neuroinflammation.4   treat2   Fosb 1.100598 Treatment Neuroinflammation
## neuroinflammation.5   treat1   Junb 2.465547 Treatment Neuroinflammation
## neuroinflammation.6   treat2   Junb 2.357508 Treatment Neuroinflammation
## 
## $PVF
##       subgroup symbol     value     group goi_set
## pvf.1   treat1   Spp1 5.1032277 Treatment     PVF
## pvf.2   treat2   Spp1 5.1711888 Treatment     PVF
## pvf.3   treat1 Col6a1 1.3195101 Treatment     PVF
## pvf.4   treat2 Col6a1 0.8752999 Treatment     PVF
## pvf.5   treat1 Col1a1 1.4631855 Treatment     PVF
## pvf.6   treat2 Col1a1 2.1215638 Treatment     PVF
## 
## $`Reg. fib. mig.`
##           subgroup  symbol     value     group        goi_set
## fib_mig.1   treat1   Acta2 1.4120275 Treatment Reg. fib. mig.
## fib_mig.2   treat2   Acta2 0.8888933 Treatment Reg. fib. mig.
## fib_mig.3   treat1   Actr3 5.2965038 Treatment Reg. fib. mig.
## fib_mig.4   treat2   Actr3 5.6853130 Treatment Reg. fib. mig.
## fib_mig.5   treat1 Adipor2 8.1594740 Treatment Reg. fib. mig.
## fib_mig.6   treat2 Adipor2 8.7681594 Treatment Reg. fib. mig.
## 
## $`Up in EAE in CB & Dm`
##                 subgroup  symbol    value     group              goi_set
## eae_common_up.1   treat1    Wif1 5.631980 Treatment Up in EAE in CB & Dm
## eae_common_up.2   treat2    Wif1 5.080916 Treatment Up in EAE in CB & Dm
## eae_common_up.3   treat1   Flrt2 9.561554 Treatment Up in EAE in CB & Dm
## eae_common_up.4   treat2   Flrt2 9.700943 Treatment Up in EAE in CB & Dm
## eae_common_up.5   treat1 Scgb1a1 3.487086 Treatment Up in EAE in CB & Dm
## eae_common_up.6   treat2 Scgb1a1 3.424446 Treatment Up in EAE in CB & Dm
Aggregate by gene set
# For each cell, aggregate each gene set
# Returns a sample x geneset table (with additional metadata columns)
goi_agg_by_genes <- function(sce) {
  cbind(
    # Sample metadata
    sce@colData %>%
      as.data.frame() %>%
      select(any_of(c("cluster", "celltype", "group", "subgroup", "Group"))),

    # Mean expression for genes-of-interest
    lapply(goi, function(goi) {
      list() %>% within({
        assign(
          goi$name,
          sce@assays@data$counts %>%
            log1p() %>%
            take_rows(goi$genes) %>%
            colMeans()
        )
      })
    }),

    # Library size
    data.frame(lib.size = sce@assays@data$counts %>% colSums()),

    # Reduced dim coordinates
    sce %>%
      scater::logNormCounts() %>%
      scater::runPCA() %>%
      SingleCellExperiment::reducedDim("PCA") %>%
      as.data.frame() %>%
      select(x = PC1, y = PC2, z = PC3)
  )
}
Example
get_goi_genes() %>%
  mock_sce(genes = ., ncells = 77) %>%
  goi_agg_by_genes() %>%
  head() %>%
  DT::datatable(width = "100%", options = list(scrollX = TRUE))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.

Kidney fibroblasts

This is a set of differential genes in kidney fibroblast activation from Deng et al., 2021.

Load
fb_genes_si7 <- path_to("*/Deng-Wei-2021/*/SI7.*.gz") %>% from_tsv()
# fb_genes_si8 <- path_to("*/Deng-Wei-2021/*/SI8.*.gz") %>% from_tsv()
show_tbl <- function(tbl) {
  tbl %<>%
    ind2col("symbol") %>%
    mutate(symbol = as.link(symbol)) %>%
    select(symbol, logFC)

  tbl %>%
    DT::datatable(escape = FALSE) %>%
    DT::formatSignif(colnames(tbl %>% select(-symbol)), digits = 3)
}
SI7

The DE table from SI7 extracted from GSE121190.

fb_genes_si7 %>%
  filter(adj.P.Val < 1e-2) %>%
  show_tbl()

Daneman, 2021

The list of genes is from Dorrier-Aran-…-Daneman, 2021, Extended Data Fig. 4.

Add clusters to genes of interest
if (!("c0" %in% names(goi))) {
  goi %<>%
    c(
      list(
        `0` = c("Col1a2", "Bgn", "Col3a1", "Col8a1", "Igf1", "Ifitm1", "Col1a1", "Cfh", "Sparc", "B2m"),
        `1` = c("Apod", "Trf", "Dcn", "Gsn", "Gpc3", "Tgfbi", "Ccl11", "Smoc2", "Lum", "Cst3"),
        `2` = c("Lgals1", "Timp1", "Rpl41", "Rps18", "Rps12", "Col4a1", "Cxcl10"),
        `3` = c("Junb", "Fosb", "Fos", "Ptn", "Igfbp2", "Klf4", "Clec3b", "Apoe", "Rbp4", "Vtn"),
        `4` = c("Rbp1", "Fth1", "Aldh1a2", "Fn1", "Igfbp3", "Slc7a11", "Ptgds", "Timp3", "Igfbp5"),
        `5` = c("Ptch1", "Igfbp6", "Tmsb4x", "S100a6", "Igfbp7", "Mgp", "Saa1"),
        `6` = c("Itih5", "Spp1", "Klf2", "Ubb", "Jund", "Mt1"),
        `7` = c("Rgs5", "Acta2", "Myl9", "Gm13889", "Tagln", "Tpm2", "Actb")
      ) %>%
        stack() %>%
        mutate(cluster = paste0("c", ind), name = paste0("Dm #", ind)) %>%
        split(.$cluster) %>%
        lapply(function(c) list(name = unique(c$name), genes = c$values))
    )
}
Visualizing in heatmap
heatmap <- function(sce, colors = NULL,
                    anno_col = NULL, anno_row = NULL,
                    ...) {
  # `colors` should be like list(`negative` = "Blue", `positive` = "Red", ...)
  colors %<>% c(NULL) %>% unlist()

  if (!is.null(anno_row)) {
    stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
  }

  if (!is.null(anno_col)) {
    stopifnot(assertthat::are_equal(colnames(sce), rownames(anno_col)))
  }

  anno_col %<>%
    list(
      sce@colData %>%
        as.data.frame() %>%
        select(any_of(c("celltype", "subgroup", "Group", "cluster", "Treatment")))
    ) %>%
    dual(cbind)

  sce %>%
    SingleCellExperiment::counts() %>%
    log1p() %>%
    {
      # (.) - rowMeans(.)
      # (.) / rowSums(.)
    } %>%
    pheatmap::pheatmap(
      show_colnames = FALSE,
      treeheight_row = 0,
      treeheight_col = 0,
      cluster_rows = FALSE,
      fontsize = 6,
      color = grDevices::colorRampPalette(c("black", "yellow"))(10),
      annotation_col = anno_col,
      annotation_row = anno_row,
      annotation_colors = c(
        # TODO: use blue-red for LFC
        anno_row %>%
          as.list() %>%
          lapply(ramp),
        anno_col %>%
          as.list() %>%
          lapply(function(x) {
            if (all(x %in% names(colors))) {
              colors[unique(as.character(x))] %>% unlist()
            } else {
              ramp(x)
            }
          })
      ),
      ...
    )
}
show_dm_heatmap <- function(sce, anno_row = NULL, ...) {
  dm_genes <-
    goi[grep("c[0-9]", goi %>% names())] %>%
    lapply(as.data.frame) %>%
    dual(rbind) %>%
    pull(name, genes)

  if (!is.null(anno_row)) {
    stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
  }

  anno_row %<>%
    take_rows(names(dm_genes))

  sce %<>%
    take_rows(names(dm_genes))

  anno_row %<>%
    list(data.frame(dm.cluster = dm_genes[rownames(sce)])) %>%
    dual(cbind)

  heatmap(sce, anno_row = anno_row, ...)
}
unlist(goi[grep("c[0-9]", goi %>% names())]) %>%
  mock_sce(ncells = 77) %>%
  {
    (.)[, order((.)$celltype)]
  } %>%
  show_dm_heatmap(
    colors = colors,
    cluster_cols = FALSE,
    anno_col = data.frame(x = ((1:ncol(.)) %% 2), row.names = colnames(.)),
    anno_row = data.frame(y = ((1:nrow(.)) %% 2), row.names = rownames(.))
  )

All GoI

The following table shows all the “genes of interest”.

get_goi_genes() %>%
  data.frame(symbol = .) %>%
  show_gene_info_table()

Analysis tools

Dummy dataset

Small dataset to run through quickly.

sce_dummy <- readRDS(path_to("*/*/sce_dummy.rds"))
# Made with:
# load_dataset$dm(size = 77) %>% keep_only_goi() %>% saveRDS(file = "sce_dummy.rds")
## class: SingleCellExperiment 
## dim: 267 74 
## metadata(0):
## assays(1): counts
## rownames(267): Fos Fosb ... Zbtb20 Zbtb7a
## rowData names(0):
## colnames(74): healthy2_AATCCAGCATAGTAAG EAE1_AAAGATGTCGGAAATA ...
##   EAE1_CTGTTTAAGAGTCTGG EAE1_TTAGTTCTCTCTGAGA
## colData names(4): group subgroup cluster celltype
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
data.frame(
  x = sce_dummy@assays@data$counts %>% colSums(),
  subgroup = sce_dummy$subgroup
) %>%
  ggplot(aes(x = x, color = subgroup)) +
  scale_color_manual(values = colors[as.character(gglast(colour))]) +
  geom_density()

Scater

Computation of red. dim.
scater_attach <- function(sce) {
  scater::addPerCellQC(
    x = sce,
    subsets = list(Mito = grep("mt-", rownames(sce)))
  ) %>%
    scater::logNormCounts() %>%
    scater::runPCA(name = "PCA", ncomponents = 20) %>%
    scater::runTSNE(perplexity = 10, dimred = "PCA") %>%
    scater::runUMAP()
}
sce_dummy %<>% scater_attach()
## Read the 74 x 20 data matrix successfully!
## OpenMP is working. 1 threads.
## Using no_dims = 2, perplexity = 10.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.00 seconds (sparsity = 0.595690)!
## Learning embedding...
## Iteration 50: error is 69.228473 (50 iterations in 0.01 seconds)
## Iteration 100: error is 70.271197 (50 iterations in 0.01 seconds)
## Iteration 150: error is 61.997081 (50 iterations in 0.01 seconds)
## Iteration 200: error is 67.325230 (50 iterations in 0.00 seconds)
## Iteration 250: error is 63.980974 (50 iterations in 0.00 seconds)
## Iteration 300: error is 2.246460 (50 iterations in 0.00 seconds)
## Iteration 350: error is 1.672271 (50 iterations in 0.00 seconds)
## Iteration 400: error is 1.376569 (50 iterations in 0.00 seconds)
## Iteration 450: error is 1.158846 (50 iterations in 0.00 seconds)
## Iteration 500: error is 0.879138 (50 iterations in 0.00 seconds)
## Iteration 550: error is 0.741583 (50 iterations in 0.00 seconds)
## Iteration 600: error is 0.711154 (50 iterations in 0.00 seconds)
## Iteration 650: error is 0.690883 (50 iterations in 0.00 seconds)
## Iteration 700: error is 0.692199 (50 iterations in 0.00 seconds)
## Iteration 750: error is 0.692134 (50 iterations in 0.00 seconds)
## Iteration 800: error is 0.675255 (50 iterations in 0.00 seconds)
## Iteration 850: error is 0.674572 (50 iterations in 0.00 seconds)
## Iteration 900: error is 0.677335 (50 iterations in 0.00 seconds)
## Iteration 950: error is 0.674728 (50 iterations in 0.00 seconds)
## Iteration 1000: error is 0.671796 (50 iterations in 0.00 seconds)
## Fitting performed in 0.08 seconds.
## 11:59:58 UMAP embedding parameters a = 1.896 b = 0.8006
## 11:59:58 Read 74 rows and found 267 numeric columns
## 11:59:58 Reducing X column dimension to 50 via PCA
## 11:59:58 PCA: 50 components explained 95.41% variance
## 11:59:58 Using FNN for neighbor search, n_neighbors = 15
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/FNN/libs/FNN.so") ...
## 11:59:59 Commencing smooth kNN distance calibration using 4 threads
## 12:00:00 Initializing from normalized Laplacian + noise
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSpectra/libs/RSpectra.so") ...
## 12:00:00 Commencing optimization for 500 epochs, with 1758 positive edges
## 12:00:01 Optimization finished
sce_dummy %>%
  scater::makePerCellDF() %>%
  head(3) %>%
  DT::datatable(width = "100%")
Plotting (2d)
scater_show <- function(sce, color.by = "celltype", colors = NULL) {
  items <- sce@colData[[color.by]]

  colors <-
    if (all(items %in% names(colors))) {
      colors[unique(as.character(items))]
    } else {
      ramp(items, palette = "Set 2")
    }

  sce %>%
    {
      function(which_dimred) {
        suppressMessages(
          scater::plotReducedDim(
            dimred = which_dimred,
            object = (.),
            colour_by = color.by
          ) +
            scale_color_manual(values = colors) + # no need for `breaks`
            labs(color = color.by) +
            guides(color = guide_legend(override.aes = list(size = 2, alpha = 1))) +
            theme(
              axis.text = element_blank(),
              axis.ticks = element_blank()
            )
        )
      }
    } %>%
    {
      gridExtra::arrangeGrob((.)("PCA"), (.)("TSNE"), (.)("UMAP"), nrow = 3)
    }
}
sce_dummy %>%
  scater_show(color.by = "subgroup", colors = colors) %>%
  gridExtra::grid.arrange()

Plotting (3d)

Prototype, not for direct use.

pca3d_template <- function(sce, color.by = "celltype", colors = ramp(sce@colData[[color.by]])) {
  data.frame(
    color_group = sce@colData[[color.by]],
    lib.size = sce@assays@data$counts %>% colSums(),
    SingleCellExperiment::reducedDim(sce, "PCA") %>%
      as.data.frame() %>%
      select(PC1, PC2, PC3)
  ) %>%
    group_by(color_group) %>%
    sample(size = 30, replace = TRUE) %>%
    ungroup() %>%
    # print() %>%

    plotly::plot_ly() %>%
    plotly::add_markers(
      x = ~PC1, y = ~PC2, z = ~PC3,
      marker = list(size = 2, opacity = 1),
      color = ~color_group,
      colors = colors
    ) %>%
    plotly::layout(
      scene = list(
        xaxis = list(title = "PC1", showticklabels = FALSE),
        yaxis = list(title = "PC2", showticklabels = FALSE),
        zaxis = list(title = "PC3", showticklabels = FALSE)
      )
    )
}
sce_dummy %>% pca3d_template(color.by = "subgroup", colors = colors)

Cell cycle

Cell cycle genes
default_cyclic_genes <-
  # Convert the "cyclic genes" of `Revelio` from Hs to Mm
  Revelio::revelioTestData_cyclicGenes %>%
  lapply(function(genes) {
    merge(
      x = data.frame(symbol.x = genes),
      y = homology(taxid$hs, taxid$mm),
      by = "symbol.x",
      all.x = TRUE
    ) %>%
      pull(symbol.y) %>%
      sort(na.last = TRUE)
  })
default_cyclic_genes %>%
  as.data.frame() %>%
  head() %>%
  rbind("...") %>%
  kable()
G1.S S G2 G2.M M.G1
Abca7 a 1700001L19Rik Adh4 1700102P08Rik
Acd Abcc2 1700066M21Rik Ahi1 2700049A03Rik
Acyp1 Abcc5 Alkbh1 Akirin2 Afap1
Adamts1 Abhd10 Anln Ankrd40 Agfg1
Adck2 Adam22 Ap3d1 Anln Agpat3
Adcy6 Arhgap42 Arhgap19 Arhgap19 Akap13
Computation of cell cycle phase

The DC1-DC2 plane is supposed to show a circle reflecting the cell cycle (cf. Fig 1 in Schwabe et al., 2020).

cell_cycle <- function(sce, batch.by = NA, cyclic_genes = default_cyclic_genes) {
  list(
    counts = sce@assays@data$counts,
    orig.id = colnames(sce),
    batch = if (is.na(batch.by)) "cell" else paste0(sce@colData[[batch.by]], "_")
  ) %>%
    within({
      colnames(counts) <- paste0(batch, 1:ncol(counts))
      orig.id <- data.frame(old = orig.id, new = colnames(counts)) %>% pull(old, new)
    }) %>%
    with({
      counts %>%
        Revelio::createRevelioObject(cyclicGenes = cyclic_genes, lowernGeneCutoff = 0) %>%
        Revelio::getCellCyclePhaseAssignInformation() %>%
        Revelio::getPCAData() %>%
        Revelio::getOptimalRotation() %>%
        list(cyclic = .) %>%
        with({
          assertthat::assert_that(all(
            rownames(cyclic@cellInfo) == colnames(cyclic@transformedData$pca$data)
          ))

          cbind(
            cyclic@transformedData$pca$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
            cyclic@transformedData$dc$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
            cyclic@cellInfo
          )
        }) %>%
        data.frame(row.names = orig.id[rownames(.)])
    }) %>%
    cbind(as.data.frame(sce@colData)[rownames(.), , drop = FALSE])
}
2d plotting of cell cycle “trajectory”
cell_cycle_plot2d <- function(cco, colors = ramp(cco$ccPhase)) {
  cco %>%
    ggplot(aes(x = DC1, y = DC2, color = ccPhase)) +
    geom_point(size = 3, stroke = 1) +
    scale_color_manual(values = colors, breaks = gglast(colour))
}
Test on Revelio data
cco_revelio <-
  cell_cycle(
    Revelio::revelioTestData_rawDataMatrix %>%
      make_sce(
        counts = .,
        coldata = data.frame(
          x = colnames(.) %>% strsplit("_") %>% dual(rbind),
          row.names = colnames(.)
        ) %>% select(batch = x.1)
      ),
    batch.by = "batch",
    cyclic_genes = Revelio::revelioTestData_cyclicGenes
  )
## 2021-07-08 20:11:07: calculating optimal rotation: 2021-07-08 20:11:07: calculating PCA: 2021-07-08 20:11:07: assigning cell cycle phases: 2021-07-08 20:11:07: reading data: 5.73secs
## 31.71secs
## 58.25secs
## 1.51mins
cco_revelio %>%
  ggplot(aes(x = DC1, y = DC2, color = ccPhase, shape = batch)) +
  geom_point(size = 3, stroke = 1) +
  scale_color_manual(values = colors, breaks = gglast(colour))

Test on mock data
cco_mock <-
  unlist(as.list(default_cyclic_genes)) %>%
  {
    (.)[!is.na(.)]
  } %>%
  mock_sce(ncells = 33) %>%
  cell_cycle()
## 2021-07-08 20:12:39: calculating optimal rotation: 2021-07-08 20:12:39: calculating PCA: 2021-07-08 20:12:39: assigning cell cycle phases: 2021-07-08 20:12:39: reading data: 0.01secs
## 0.54secs
## 0.62secs
## 1.06mins
cco_mock %>%
  ggplot(aes(x = DC1, y = DC2, color = celltype, fill = ccPhase)) +
  geom_point(shape = 21, size = 5) +
  ggplot2::scale_color_manual(values = colors, breaks = gglast(colour)) +
  ggplot2::scale_fill_manual(values = colors, breaks = gglast(fill))

3d plotting of cell cycle “trajectory” with plotly
cell_cycle_plot3d <- function(cco, colors = ramp(cco$ccPhase)) {
  cco %>%
    with({
      plotly::plot_ly() %>%
        plotly::add_markers(
          x = DC1, y = DC2, z = DC3,
          size = 4,
          color = ccPhase,
          colors = colors
        )
    })
}
Test 3d plot
cco_mock %>% cell_cycle_plot3d()
Alternative 3d plotting with rgl
cco_mock %>%
  with({
    rgl::plot3d(DC1, DC2, DC3, size = 3, type = "s", col = ramp(ccPhase)[ccPhase])
    rgl::rglwidget()
  })

DE

Computation of DE genes
deseq <- function(sce, design) {
  sce %>%
    DESeq2::DESeqDataSet(design = design) %>%
    DESeq2::estimateSizeFactors(type = "poscounts") %>%
    DESeq2::DESeq(quiet = TRUE, fitType = "local") %>%
    DESeq2::results() %>%
    as.data.frame() %>%
    select(base = baseMean, lfc = log2FoldChange, p = pvalue) %>%
    mutate(base = log1p(base)) %>%
    tidyr::drop_na() %>%
    mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
    ind2col("symbol") %>%
    list(table = ., design = design)
}
edger <- function(sce, design) {
  sce %>%
    edgeR::estimateDisp(robust = TRUE) %>%
    # edgeR::calcNormFactors(method = "TMM") %>%
    # edgeR::estimateGLMTagwiseDisp(design = design) %>%
    edgeR::glmFit(design = model.matrix(design, data = sce@colData)) %>%
    edgeR::glmLRT(coef = 2) %>%
    `$`(table) %>%
    select(base = logCPM, lfc = logFC, p = PValue, LR = LR) %>%
    tidyr::drop_na() %>%
    mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
    ind2col("symbol") %>%
    list(table = ., design = design)
}
# Hacky fctn to determine colors for `lfc > 0` and `lfc < 0`
lfc_colors <- function(sce, design, colors) {
  f <- terms(design) %>% attr("factors")
  stopifnot((nrow(f) == 1) && (ncol(f) == 1))

  f <- sce@colData[[f %>% colnames()]]

  cc <- colors[as.character(f %>% levels())] # TODO: need `as.character`?
  cc <- c(cc[[1]], (rowMeans(col2rgb(cc[-1])) / 255) %>% as.list() %>% dual(rgb))

  (. %>% (colorRamp(cc)) %>% RGB())
}
compute_de <- function(sce, design) {
  list(
    deseq = suppressMessages(deseq(sce, design)),
    edger = suppressMessages(edger(sce, design))
  ) %>%
    with({
      list(
        table = rbind(
          deseq$table %>%
            select(symbol, base, lfc, p) %>%
            mutate(src = "DESeq2"),
          edger$table %>%
            select(symbol, base, lfc, p) %>%
            mutate(src = "edgeR")
        ),
        design = design
      )
    }) %>%
    within({
      # Attach LFC color (not very important)
      table %<>%
        group_by(src) %>%
        mutate(color = (lfc_colors(sce, design, colors))(percent_rank(lfc))) %>%
        ungroup()
    })
}
Example
sce_dummy.de <- sce_dummy %>% compute_de(~group)
sce_dummy.de$table %>%
  head() %>%
  rbind("...") %>%
  kable()
symbol base lfc p src color
Fos 1.94433786752507 1.03952791369199 0.0350644319817973 DESeq2 #C98144FF
Fosb 1.32188433643544 0.39269919533489 0.94357785439112 DESeq2 #A09B36FF
Junb 1.71655787188708 0.618371363877933 0.403770457436528 DESeq2 #B0913BFF
Spp1 0.685515786871741 1.75726212594778 0.121724809299488 DESeq2 #E4704DFF
Col6a1 0.644138066638724 0.767376344078733 0.324044869548654 DESeq2 #B68D3DFF
Col1a1 2.86899094589606 1.87748518409641 1.75462356248159e-09 DESeq2 #E86E4EFF
Pretty-printing
show_de_table <- function(.) {
  (.) %>%
    group_by(src) %>%
    slice_min(p, n = 777) %>%
    slice_max(abs(lfc), n = 777) %>%
    ungroup() %>%
    merge(
      y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
      all.x = TRUE,
      by = "symbol"
    ) %>%
    merge(
      y = gene.summary$mm %>% select(gene_id, description, summary),
      all.x = TRUE,
      by = "gene_id"
    ) %>%
    select(-any_of("gene_id")) %>%
    select(-any_of("color")) %>%
    # Role: TF, Rec, Lig, etc
    mutate(role = symbol2role(symbol)) %>%
    mutate(symbol = as.link(symbol)) %>%
    mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
    select(-summary) %>%
    mutate(description = paste0("<span style='font-size: 70%;'>", description, "</span>")) %>%
    DT::datatable(escape = FALSE) %>%
    DT::formatSignif(c("base", "lfc", "p"), digits = 3)
}
Example
sce_dummy.de$table %>%
  group_by(src) %>%
  slice_min(p, n = 33) %>%
  ungroup() %>%
  show_de_table()
As heatmap
show_de_heatmap <- function(sce, de_table, n = 33, ...) {
  top <- de_table %>%
    group_by(lfc > 0) %>%
    slice_min(p, n = n, with_ties = FALSE) %>%
    ungroup() %>%
    arrange(lfc) %>%
    select(symbol, lfc, p) %>%
    mutate(`-log10(p)` = -log10(p)) %>%
    by_col1() %>%
    `[`(rownames(.) %in% rownames(sce), )

  sce[rownames(top), ] %>%
    heatmap(anno_row = top, ...)
}
Example
sce_dummy %>%
  show_de_heatmap(sce_dummy.de$table %>% filter(src == "edgeR"), colors = colors)

Volcano plot
show_de_volcano <- function(de_table) {
  de_table %<>%

    mutate(goi_set = symbol2goiset(symbol)) %>%
    group_by(src) %>%
    arrange(lfc) %>%
    mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
    ungroup() %>%
    mutate(label = ifelse(is_out, symbol, ""))

  ggplot(de_table, aes(x = lfc, y = -log10(p))) +

    # All genes
    geom_point(color = "gray") +

    # Color by geneset
    geom_point(
      data = de_table %>% filter(goi_set != "Other"),
      aes(x = lfc, y = -log10(p), color = goi_set)
    ) +
    scale_color_manual(values = ramp(de_table$goi_set)) +
    labs(color = "Gene set") +

    # Label outliers
    #geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 4), size = 3, color = "blue") +
    ggrepel::geom_text_repel(
      aes(label = label),
      hjust = -0.3, size = 2, alpha = 1, show.legend = FALSE,
      max.overlaps = Inf, 
      point.size = NA,
      color = "blue",
      segment.alpha = 0.2
    ) +
    
    facet_grid(. ~ src, scales = "free") +
    theme(legend.position = "bottom")
}
Example
sce_dummy.de$table %>% show_de_volcano()

Expression vs LFC plot
show_de_basecamp <- function(.) {
  (.) %>%
    mutate(goi_set = symbol2goiset(symbol)) %>%
    
    group_by(goi_set) %>%
    arrange(lfc) %>%
    mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
    mutate(is_out = is_out | (min(tail(sort(base), 5)) <= base)) %>%
    ungroup() %>%
    
    mutate(label = ifelse(is_out, symbol, "")) %>%
    
    ggplot(aes(x = lfc, y = base)) +
    
    geom_point(data = (.), alpha = 0.5, size = 1, color = "grey") +
    geom_point(color = "black", size = 0.5, alpha = 0.3) +
    scale_y_log10() +
    
    
    # geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 0.01), size = 2, color = "blue") +
    ggrepel::geom_text_repel(
      aes(label = label),
      hjust = -0.3, size = 2, show.legend = FALSE,
      max.overlaps = Inf, point.size = NA,
      segment.alpha = 0.2,
      color = "blue"
    ) +

    facet_wrap(~goi_set, scale = "free_y") +

    # Note: put labs(x = "..") outside
    labs(y = "Expression") +
    theme(
      legend.position = "bottom",
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
    )
}
Example
sce_dummy.de$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Condition1 <-- LFC --> Condition2")

Comparing LFC of DESeq2 vs edgeR
show_de_cmp_lfc <- function(table) {
  merge(
    table %>% filter(src == "DESeq2"),
    table %>% filter(src == "edgeR"),
    by = "symbol",
    all = TRUE
  ) %>%
    # tidyr::replace_na(list(p.x = 0, p.y = 0)) %>%
    tidyr::drop_na() %>%
    mutate(sig.x = (p.x < quantile(p.x, 0.01))) %>%
    mutate(sig.y = (p.y < quantile(p.y, 0.01))) %>%
    mutate(sig = if_else(sig.x & sig.y, "Intersection", if_else(sig.x, "DESeq", if_else(sig.y, "edgeR", "")))) %>%
    mutate(label = if_else(sig.x | sig.y, symbol, "")) %>%
    {
      ggplot((.) %>% filter(sig != ""), aes(x = lfc.x, y = lfc.y)) +
        geom_point(data = (.) %>% filter(sig == ""), stroke = 0, alpha = 0.15, size = 1) +
        geom_point(aes(color = sig), alpha = 0.5, size = 1) +
        labs(x = "LFC by DESeq2", y = "LFC by edgeR", color = "Top 1% signif.") +
        
        #geom_text(aes(label = label, color = sig), hjust = -0.3, position = position_jitter(h = 0.05), size = 3, alpha = 0.8, show.legend = FALSE) +
        ggrepel::geom_text_repel(
          aes(label = label, color = sig),
          hjust = -0.3, size = 2, alpha = 0.8, show.legend = FALSE,
          max.overlaps = 20, 
          point.size = NA,
          segment.alpha = 0.2
        ) +
        
        geom_abline(slope = 1, intercept = 0, linetype = "dashed")
    }
}
Example
sce_dummy.de$table %>% show_de_cmp_lfc()

GO

Computation
compute_go <- function(de_table, n = 33) {
  de_table %>%
    mutate(direction = if_else(lfc > 0, "up", "dn")) %>%
    group_by(direction) %>%
    slice_min(p, n = n, with_ties = TRUE) %>%
    slice_max(abs(lfc), n = n, with_ties = FALSE) %>%
    split(.$direction) %>%
    within({
      f <- (function(x) x$symbol[1:5] %>% paste(collapse = ", "))
      message(paste("Up:", f(up), "..."))
      message(paste("Dn:", f(dn), "..."))
      rm(f)
    }) %>%
    lapply(function(de_table) {
      de_table %>%
        # Get the Entrez IDs of our genes
        mutate(gene_id = symbol2geneid(symbol)) %>%
        pull(gene_id) %>%
        # GO enrichment analysis
        limma::goana(species = "Mm") %>%
        ind2col("category") %>%
        rename(p = P.DE, `#DE` = DE, `#cat` = N, GO = Ont, term = Term) %>%
        # Limit selection
        filter(p <= 0.05) %>%
        arrange(p)
    })
}
Pretty-printing
show_go_table <- function(.) {
  (.) %>%
    with({
      rbind(
        up %>% mutate(direction = "lfc_up"),
        dn %>% mutate(direction = "lfc_down")
      ) %>%
        arrange(p)
    }) %>%
    mutate(category = if_else(grepl("^GO:", category), as.link(category), category)) %>%
    mutate(category = paste0("<span style='white-space:nowrap;'>", category, "(", GO, ")", "</span>")) %>%
    mutate(direction = if_else(grepl("lfc_u", direction), paste0("<span style='color:darkred'>", direction, "</span>"), direction)) %>%
    mutate(direction = if_else(grepl("lfc_d", direction), paste0("<span style='color:darkgreen'>", direction, "</span>"), direction)) %>%
    dplyr::select(any_of(c("category", "direction", "p", "#DE", "#cat", "term"))) %>%
    DT::datatable(escape = FALSE, width = "100%") %>%
    DT::formatSignif("p", digits = 3)
}
Example
sce_dummy.go <-
  sce_dummy.de$table %>%
  compute_go()
## Up: Col4a2, Col4a1, Ier3, Col18a1, Col8a1 ...
## Dn: Col9a2, Pdhb, Col9a2, Ddit4, Col4a6 ...
rbind(
  sce_dummy.go$up %>% head(2),
  sce_dummy.go$dn %>% head(2),
  "..."
) %>%
  DT::datatable(escape = FALSE, width = "100%")
sce_dummy.go %>% show_go_table()
Gene-categorty associations
go_info <- list(
  tax = list(mm = path_to("*/*/*/*_mouse_sym2cat.txt.gz") %>% read.csv(sep = "\t")),
  obo = path_to("*/*/*/obo_go.txt.gz") %>% read.csv(sep = "\t")
)
go_info$tax$mm %>%
  head() %>%
  kable()
symbol goid
0610009B22Rik GO:0003674
0610009B22Rik GO:0006888
0610009B22Rik GO:0048471
0610009B22Rik GO:0005634
0610009B22Rik GO:0005737
0610009B22Rik GO:0030008
go_info$obo %>%
  head() %>%
  kable()
goid term namespace
GO:0000001 mitochondrion inheritance biological_process
GO:0000002 mitochondrial genome maintenance biological_process
GO:0000003 reproduction biological_process
GO:0000006 high-affinity zinc transmembrane transporter activity molecular_function
GO:0000007 low-affinity zinc ion transmembrane transporter activity molecular_function
GO:0000009 alpha-1,6-mannosyltransferase activity molecular_function
go2symbols <- function(go_term) {
  with(
    go_info,
    merge(tax$mm, filter(obo, stringr::str_detect(term, go_term)), by = "goid") %>%
      pull(symbol) %>%
      unique()
  )
}
# example: "inflammatory"
list(
  sce = sce_dummy,
  genes = go2symbols("infla"),
  table = sce_dummy.de$table %>% filter(src == "edgeR")
) %>%
  with(
    sce %>%
      .[, order(.$subgroup)] %>%
      show_de_heatmap(table %>% filter(symbol %in% genes), colors = colors, cluster_cols = FALSE)
  )

Misc

Cosine similarity
# If A and B are SingleCellExperiment-like, then `cosine` computes
# the cosine similarity of samples of A against samples of B
cosine <- function(A, B) {
  A <- A@assays@data$counts
  B <- B@assays@data$counts
  ii <- intersect(rownames(A), rownames(B))
  A <- t(as.matrix(A[ii, , drop = FALSE]))
  B <- t(as.matrix(B[ii, , drop = FALSE]))
  A <- A / sqrt(rowSums(A * A))
  B <- B / sqrt(rowSums(B * B))
  A %*% t(B)
}

Example:

cosine(
  sce_dummy[, sce_dummy$subgroup == "healthy1"],
  sce_dummy[, sce_dummy$subgroup == "healthy1"]
) %>%
  pheatmap::pheatmap()

Co-expression

Hand-made
coex <- function(sce) {
  # Filter for the correlation matrix
  f <- function(.) ((.) != 0)

  sce %>%
    SingleCellExperiment::counts() %>%
    t() %>%
    # stats::cor(method = "spearman") %>%
    Hmisc::rcorr(type = "spearman") %>%
    `$`(r) %>%
    {
      with(
        (upper.tri(.) * (.)) %>% f() %>% which(arr.ind = TRUE) %>% as.data.frame(),
        data.frame(a = rownames(.)[row], b = colnames(.)[col], w = (.)[cbind(row, col)])
      )
    }
}
show_coex_graph <- function(coex_df, de_table, n = 77) {
  coex_df %<>% as.data.frame()

  # Note: the graph vertices are pulled from the de_table
  # Only those occurring in coex_df$a or .$b are retained

  stopifnot(!any(duplicated(de_table$symbol)))

  # Color of vertex labels
  # de_table %>% pull(color, symbol)

  stopifnot(all(c("a", "b", "w") %in% colnames(coex_df)))

  # Symbol -> LFC map
  node_lfc <- de_table %>% pull(lfc, symbol)

  # Attach edge color
  coex_df %<>%
    group_by(w > 0) %>%
    mutate(color = ((1 + w / max(abs(w))) / 2)) %>%
    ungroup() %>%
    mutate(color = colorRamp(c("blue", "yellow", "red"))(color) %>% RGB(alpha = 0.2))

  # Co-expression graph
  ex_graph <-
    coex_df %>%
    # Take top `n` co-expression edges at both extremes
    group_by(w > 0) %>%
    slice_max(abs(w), n = n) %>%
    ungroup() %>%
    select(a, b, w, color) %>%
    igraph::graph_from_data_frame(
      directed = FALSE,
      vertices = (
        de_table %>%
          filter((symbol %in% coex_df$a) | (symbol %in% coex_df$b)) %>%
          select(name = symbol, color)
      )
    )

  graphs <- list(
    # Proto-graphs based on OmniPath
    tf = omnipath.tf,
    lr = omnipath.lr,
    ia = omnipath.in %>%
      anti_join(omnipath.tf, by = c("source", "target")) %>%
      anti_join(omnipath.lr, by = c("source", "target"))
  ) %>%
    # Convert them to `igraph` objects
    lapply(function(df) {
      df %>%
        mutate(W = is_stimulation - is_inhibition) %>%
        select(a = source_genesymbol, b = target_genesymbol, W) %>%
        filter(a %in% igraph::V(ex_graph)$name) %>%
        filter(b %in% igraph::V(ex_graph)$name) %>%
        group_by(a, b) %>%
        summarize(W = sum(W), .groups = "keep") %>%
        filter(W != 0) %>%
        # Attach co-expression `w` and edge color
        merge(coex_df %>% select(a, b, w, color), by = c("a", "b")) %>%
        # Ensure correct order
        select(a, b, w, W, color) %>%
        igraph::graph_from_data_frame(directed = TRUE)
    }) %>%
    within({
      # Attach the co-expression graph
      ex <- ex_graph

      # For the co-expression graph, by definition:
      # `W` is positive iff both LFC are of the same sign
      edges <- ex %>% igraph::as_data_frame("edges")
      igraph::E(ex)$W <- sign(node_lfc[edges$from] * node_lfc[edges$to])
      rm(edges)
    }) %>%
    lapply(function(g) {
      igraph::E(g)$coherent <- (sign(igraph::E(g)$w) == sign(igraph::E(g)$W))
      return(g)
    })

  rm(ex_graph)

  # for layouting
  set.seed(41)

  graph_layout <-
    (
      graphs$ex
        # + igraph::as.undirected(graphs$tf)
        # + igraph::as.undirected(graphs$lr)
        + igraph::as.undirected(graphs$ia)
    ) %>%
    # igraph::layout_with_kk(dim = 2, kkconst = igraph::vcount(.)) %>%
    igraph::layout_with_dh(maxiter = 22) %>%
    # igraph::layout_with_lgl() %>%
    # igraph::layout_with_fr(start.temp = 1e-1) %>%
    as.data.frame() %>%
    magrittr::set_colnames(c("x", "y")) %>%
    magrittr::set_rownames(igraph::V(graphs$ex)$name)

  graphs %<>%
    lapply(function(graph) {
      list(graph = graph, pos = graph_layout) %>% within({
        # Edges as dataframe
        edges <-
          graph %>%
          igraph::get.edgelist() %>%
          magrittr::set_colnames(c("a", "b")) %>%
          as.data.frame()

        # Attach segment data
        segment <-
          cbind(
            take_rows(pos, edges$a) %>% magrittr::set_colnames(c("x", "y")),
            take_rows(pos, edges$b) %>% magrittr::set_colnames(c("xend", "yend"))
          ) %>%
          mutate(
            x = x + (xend - x) * 0.02, xend = xend - (xend - x) * 0.03,
            y = y + (yend - y) * 0.02, yend = yend - (yend - y) * 0.03
          )
      })
    })

  list(
    slide1 = list(
      alpha = list(ex = 2e-2, tf = 0.77, lr = 3e-2, ia = 3e-2),
      title = "TF -> target"
    ),
    slide2 = list(
      alpha = list(ex = 2e-2, tf = 3e-2, lr = 0.77, ia = 3e-2),
      title = "Ligand -> receptor"
    ),
    slide3 = list(
      alpha = list(ex = 2e-2, tf = 3e-2, lr = 3e-2, ia = 0.77),
      title = "More protein-protein interaction"
    ),
    slide4 = list(
      alpha = list(ex = 0.55, tf = 3e-2, lr = 3e-2, ia = 3e-2),
      title = "Co-expression"
    )
  ) %>% lapply(function(slide) {
    p <- ggplot()

    p <- p +
      # Co-expression segments
      geom_segment(
        data = graphs$ex$segment,
        aes(x = x, y = y, xend = xend, yend = yend),
        color = igraph::E(graphs$ex$graph)$color,
        alpha = slide$alpha$ex,
        size = 0.1,
        linetype = if_else(igraph::E(graphs$ex$graph)$coherent, "solid", "dashed")
      )

    for (x in c("tf", "lr", "ia")) {
      E <- igraph::E(graphs[[x]]$graph)
      p <- p +
        geom_curve(
          data = graphs[[x]]$segment,
          lineend = "butt", # linejoin = "mitre",
          aes(x = x, y = y, xend = xend, yend = yend),
          arrow = arrow(
            length = unit(0.007, "npc"),
            # activation / inhibition arrow:
            angle = if_else(E$W > 0, 10, if_else(E$W < 0, 90, 0))
          ),
          color = E$color,
          alpha = slide$alpha[[x]],
          size = 0.2,
          curvature = 0.05,
          linetype = if_else(E$coherent, "solid", "dashed")
        )
    }

    p <- p +
      # Labels
      geom_text(
        data = graph_layout,
        aes(x = x, y = y, label = igraph::V(graphs$ex$graph)$name),
        size = 2,
        color = igraph::V(graphs$ex$graph)$color
      )

    p <- p +
      theme_void() +
      ggplot2::ggtitle(slide$title) +
      theme(plot.title = element_text(hjust = 0.95, vjust = -3, size = 10))

    return(p)
  })

  # graph %>%
  #   igraph::plot.igraph(
  #     edge.width = 0.7,
  #     edge.color = igraph::E(.)$color,
  #
  #     vertex.size = 0,
  #     vertex.shape = 'none',
  #     #vertex.color = "black",
  #     vertex.label.cex = 0.4,
  #     vertex.label.color = vcolor[igraph::V(.)$name],
  #
  #     layout = graph_layout
  #   )
}
sce_dummy %>%
  `[`(, (.)$group == "healthy") %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(sce_dummy.de$table %>% filter(src == "edgeR"))

visNetwork
requireNamespace("visNetwork")
## Loading required namespace: visNetwork
omnipath.tf %>%
  mutate(a = source_genesymbol, b = target_genesymbol) %>%
  filter(a %in% get_goi_genes(), b %in% get_goi_genes()) %>%
  select(a, b) %>%
  igraph::graph_from_data_frame(directed = FALSE) %>%
  visNetwork::toVisNetworkData() %>%
  {
    visNetwork::visNetwork(nodes = .$nodes, edges = .$edges, layout = "layout_with_kk") %>%
      visNetwork::visPhysics(enabled = FALSE)
  }
Scenic
# scenic_options <-
#   SCENIC::initializeScenic(org = "mgi", dbDir = path_to("data/SCENIC/*cache"), dbs = SCENIC::defaultDbNames[["mgi"]], datasetTitle = "dummy", nCores = 1)
# sce_dummy@assays@data$counts %>%
#   list(counts = .) %>% with({
#     counts %>%
#       `[`(SCENIC::geneFiltering(., scenicOptions = scenic_options), ) %>%
#       #SCENIC::runCorrelation(scenic_options) %>%
#       log1p() %>%
#       SCENIC::runGenie3(scenic_options)
#   })
#
#
# `%dopar%` <- foreach::`%dopar%`
# foreach <- foreach::foreach
# scenic_options %>%
#   SCENIC::runSCENIC_1_coexNetwork2modules() %>%
#   SCENIC::runSCENIC_2_createRegulons() %>%
#   SCENIC::runSCENIC_3_scoreCells(log1p(sce_dummy@assays@data$counts))

Remove unused.

rm(cco_revelio)
gene.summary$hs <- NULL
gc()
##            used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells  9417162  503   16351961 873.3 16351961 873.3
## Vcells 25156436  192   42660394 325.5 35483662 270.8

RAM usage.

culprits() %>%
  head(10) %>%
  kable()
size cumsize unit
omnipath.tf 66.00 170.0 Mb
omnipath.in 41.00 110.0 Mb
gene.summary 39.00 66.0 Mb
go_info 16.00 28.0 Mb
omnipath.lr 4.10 12.0 Mb
show_coex_graph 1.60 8.1 Mb
sce_dummy 0.52 6.5 Mb
sce_dummy.go 0.48 6.0 Mb
goi_agg_by_group 0.45 5.5 Mb
heatmap 0.37 5.1 Mb

Single-cell datasets

>

Prepare

Loader list
# Collection of dataset loaders
load_dataset <- list()
Summarizer
# Summarizes each column in `cols`
# Call with results='asis'
tables <- function(sce, cols) {
  for (col in cols) {
    sce@colData %>%
      as.data.frame() %>%
      pull(col) %>%
      table(useNA = "ifany") %>%
      as.data.frame() %>%
      rename_with(~col, ".") %>%
      rename_with(~"#", Freq) %>%
      t() %>%
      kable() %>%
      print()

    # https://bookdown.org/yihui/rmarkdown-cookbook/kable.html#generate-multiple-tables-from-a-for-loop
    # https://stackoverflow.com/questions/52631689/how-to-show-formatted-r-output-with-results-asis-in-rmarkdown
  }
}

Loaders

Allen Brain, 2020

About

“Mouse Whole Cortex and Hippocampus 10x” dataset from Allen Brain, 2020: [explore], [download], [protocols].

The dataset contains ~1M cells. We have selected ~31k cells of type “Astro”, “Endo” and “VLMC”.

Loader
load_dataset$ab <- function(max.size = 9999) {
  sce <-
    make_sce(
      counts = (
        path_to("*/AllenBrain/Mouse-WCH*/*fewer_cells/data.*.gz") %>%
          from_tsv() %>%
          `[`(, sort(colnames(.)))
      ),
      coldata = (
        path_to("*/AllenBrain/Mouse-WCH*/f*cells/meta.*.gz") %>%
          from_tsv() %>%
          `[`(sort(rownames(.)), )
      )
    ) %>%
    # Only keep VLMC
    `[`(, (.)$subclass_label == "VLMC") %>%
    # Select a random subset of cells for expediency
    `[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
    # Drop non-expressed cells
    `[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
    # Order by cell type: sometimes convenient below, esp. for plotting
    `[`(, order((.)$cell_type_alias_label))

  # conversion to matrix takes long, hence do it here
  sce@assays@data$counts %<>% as.matrix()

  # alias `celltype` column
  sce@colData$celltype <- sce@colData$cell_type_alias_label

  sce@colData$celltype %<>% as.factor() %>% relevel(ref = "375_VLMC")

  return(sce)
}

Betsholtz, 2018

About

GSE98816: single cell RNA-seq of mouse brain vascular transcriptomes.

Loader
load_dataset$bh <- function(max.size = 9999) {
  sce <-
    make_sce(
      counts = path_to("*/Betsholtz-2018/*/expr.*.gz") %>%
        from_tsv() %>%
        t(),
      coldata = path_to("*/Betsholtz-2018/*/meta.*.gz") %>% from_tsv()
    ) %>%
    # Only keep FB
    `[`(, (.)$celltype %in% c("FB1", "FB2")) %>%
    # Select a random subset of cells for expediency
    `[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
    # Drop non-expressed cells
    `[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
    # Order by cell type
    `[`(, order((.)$celltype))

  sce@assays@data$counts %<>% as.matrix()

  sce@colData$celltype %<>% as.factor() %>% relevel(ref = "FB1")

  return(sce)
}

Castelo-Branco, 2018

About

Raw data from GSE113973 (last update: Nov 13, 2019), metadata from UCSC cell browser.

Other links: web app, paper.

Loader
load_dataset$cb <- function(max.size = 9999) {
  sce <-
    make_sce(
      counts = path_to("*/CasteloBranco-2018/*/expr.*.gz") %>%
        from_tsv() %>%
        t(),
      coldata = path_to("*/CasteloBranco-2018/*/meta.*.gz") %>%
        from_tsv()
    ) %>%
    # Only keep those cells
    `[`(, (.)$celltype %in% c("VLMC1", "VLMC2", "VLMC3", "VLMC4")) %>%
    # Select a random subset of cells for expediency
    `[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
    # Drop non-expressed cells
    `[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
    # Order by cell type
    `[`(, order((.)$celltype))

  sce@assays@data$counts %<>% as.matrix()

  sce@colData$Group %<>% as.factor() %>% relevel(ref = "Ctrl")
  sce@colData$celltype %<>% as.factor() %>% relevel(ref = "VLMC3")

  return(sce)
}

Daneman, 2021

About

Expression data from GSE135186 (last update: Feb 10, 2021). The cell-to-cluster assignment is a private communication by D. Aran (2021-05-20). The associated paper is Dorrier-Aran-…-Daneman, 2021.

Loader
load_dataset$dm <- function(max.size = 9999) {
  sce <-
    make_sce(
      counts = path_to("*/Daneman-2020/*/expr.*.gz") %>%
        from_tsv() %>%
        t(),
      coldata = path_to("*/Daneman-2020/*/meta.*.gz") %>%
        from_tsv()
    ) %>%
    # Select a random subset of cells for expediency
    `[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
    # Drop non-expressed cells (there shouldn't be any here)
    `[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
    # Order samples
    `[`(, order((.)$cluster))

  # Drop cells without cluster (they separate in dim. red. plots)
  sce %<>% `[`(, !is.na((.)$cluster))

  sce@assays@data$counts %<>% as.matrix()

  # Set factor reference level
  sce@colData$group %<>% as.factor() %>% relevel(ref = "healthy")
  sce@colData$cluster %<>% c() %>%
    as.factor() %>%
    relevel(ref = "0")

  # Alias for consistency
  sce@colData$celltype <- sce@colData$cluster

  return(sce)
}

Load all

Read from disk
datasets <- load_dataset %>% lapply(\(loader) memo(loader)(max.size = 777))
# Note: Don't cache but memoize here
Dealias gene symbols
datasets %<>% lapply(dealias_sce)
## Warning in limma::alias2SymbolTable(., species = species): Multiple symbols
## ignored for one or more aliases
## Warning in is.na(.) %>% {: Dealiasing with `limma` genes resulted in 3979 NAs,
## e.g. at: Gm28402, Gm37635, Gm48945, Gm26969, AC155941.1
## Warning in limma::alias2SymbolTable(., species = species): Multiple symbols
## ignored for one or more aliases
## Warning in is.na(.) %>% {: Dealiasing with `limma` genes resulted in 137 NAs,
## e.g. at: Rims1.1, LOC102633315, AI848285, AF529169, Snord53.2
## Warning in limma::alias2SymbolTable(., species = species): Multiple symbols
## ignored for one or more aliases
## Warning in is.na(.) %>% {: Dealiasing with `limma` genes resulted in 190
## NAs, e.g. at: LOC100862268, Cypt9, Duxbl1+Duxbl2+Duxbl3, LOC101056149,
## Mir692-2b+Mir692-3
## Warning in limma::alias2SymbolTable(., species = species): Multiple symbols
## ignored for one or more aliases
## Warning in is.na(.) %>% {: Dealiasing with `limma` genes resulted in 2904 NAs,
## e.g. at: Gm37875, Gm29069, Gm42836, RP23-328F4.7, Gm43562
Attach reduced dimensions
datasets <- datasets %>% lapply(scater_attach)
# memo: `%<>%` doesn't seem to work with cache=TRUE

First look

Allen Brain

Cell type / Donor
gridExtra::grid.arrange(
  datasets$ab %>% scater_show(color.by = "celltype", colors = colors),
  datasets$ab %>% scater_show(color.by = "donor_sex_label"),
  ncol = 2
)

Betsholtz

Cell type / Mouse ID
gridExtra::grid.arrange(
  datasets$bh %>% scater_show(color.by = "celltype", colors = colors),
  datasets$bh %>% scater_show(color.by = "Mouse ID"),
  ncol = 2
)

Castelo-Branco

Cell type / Group
gridExtra::grid.arrange(
  datasets$cb %>% scater_show(color.by = "celltype", colors = colors),
  datasets$cb %>% scater_show(color.by = "Group", colors = colors),
  ncol = 2
)

Mouse model / Plate
gridExtra::grid.arrange(
  datasets$cb %>% scater_show(color.by = "MouseModel", colors = colors),
  datasets$cb %>% scater_show(color.by = "Plate", colors = colors),
  ncol = 2
)

Daneman

Subgroup / Cluster
gridExtra::grid.arrange(
  datasets$dm %>% scater_show(color.by = "subgroup", colors = colors),
  datasets$dm %>% scater_show(color.by = "cluster", colors = colors),
  ncol = 2
)

Remove unused.

gc()
##            used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells  9506831 507.8   16351961 873.3 16351961 873.3
## Vcells 25449956 194.2   65999344 503.6 60593127 462.3

RAM usage.

culprits() %>%
  head(10) %>%
  kable()
size cumsize unit
datasets 470.00 640.0 Mb
omnipath.tf 66.00 170.0 Mb
omnipath.in 41.00 110.0 Mb
gene.summary 39.00 67.0 Mb
go_info 16.00 28.0 Mb
omnipath.lr 4.10 12.0 Mb
show_coex_graph 1.60 8.3 Mb
sce_dummy 0.52 6.8 Mb
sce_dummy.go 0.48 6.2 Mb
goi_agg_by_group 0.45 5.8 Mb

Analysis of each

>

Dm clusters

The following is an approximation of Extended Data Fig 4c from Dorrier-Aran-…-Daneman, 2021.

We check whether other datasets (besides “Daneman”) cluster by their cell types using the same set of genes.

Daneman

The samples are presorted by “Daneman” cluster and by condition within each cluster.

datasets$dm %>%
  {
    # Remove the `celltype` columns (same as `cluster`)
    .@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
    # Order samples
    .[, order(paste(.$cluster, .$subgroup))]
  } %>%
  show_dm_heatmap(colors = colors, cluster_cols = FALSE)

Allen Brain

The samples are clustered.

datasets$ab %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

Betsholtz

The samples are clustered.

datasets$bh %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

Castelo-Branco

The samples are clustered.

datasets$cb %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

Cell cycle

Compute

cco <- list(
  ab = datasets$ab %>% cell_cycle(batch.by = "celltype"),
  
  bh = datasets$bh %>% cell_cycle(batch.by = "celltype"),
  
  cb = datasets$cb %>% cell_cycle(batch.by = "celltype"),
  
  dm_h12 = datasets$dm %>%
    .[, .$subgroup %in% c("healthy1", "healthy2")] %>%
    cell_cycle(batch.by = "subgroup"),
  
  dm_h12_0 = datasets$dm %>%
    .[, .$subgroup %in% c("healthy1", "healthy2")] %>%
    .[, .$cluster %in% c(0)] %>%
    cell_cycle(batch.by = "subgroup")
)
## 2021-07-08 20:15:05: calculating optimal rotation: 2021-07-08 20:15:05: calculating PCA: 2021-07-08 20:15:05: assigning cell cycle phases: 2021-07-08 20:15:05: reading data: 0.32secs
## 1.82secs
## 3.17secs
## 11.02secs
## 2021-07-08 20:15:16: calculating optimal rotation: 2021-07-08 20:15:16: calculating PCA: 2021-07-08 20:15:16: assigning cell cycle phases: 2021-07-08 20:15:16: reading data: 0.17secs
## 1.51secs
## 2.02mins
## 2.34mins
## 2021-07-08 20:17:37: calculating optimal rotation: 2021-07-08 20:17:37: calculating PCA: 2021-07-08 20:17:37: assigning cell cycle phases: 2021-07-08 20:17:37: reading data: 0.2secs
## 1.84secs
## 2.35mins
## 2.53mins
## 2021-07-08 20:20:09: calculating optimal rotation: 2021-07-08 20:20:09: calculating PCA: 2021-07-08 20:20:09: assigning cell cycle phases: 2021-07-08 20:20:09: reading data: 1.51secs
## 4.93secs
## 6.62secs
## 14.95secs
## 2021-07-08 20:20:24: calculating optimal rotation: 2021-07-08 20:20:24: calculating PCA: 2021-07-08 20:20:24: assigning cell cycle phases: 2021-07-08 20:20:24: reading data: 0.2secs
## 0.86secs
## 1.28secs
## 32.22secs

Allen Brain

cco$ab %>% cell_cycle_plot2d(colors = colors)

Betsholtz

cco$bh %>% cell_cycle_plot2d(colors = colors)

Castelo-Branco

cco$cb %>% cell_cycle_plot2d(colors = colors)

Daneman

Healthy 1+2
cco$dm_h12 %>% cell_cycle_plot2d(colors = colors)

Healthy 1+2, cluster 0
cco$dm_h12_0 %>% cell_cycle_plot2d(colors = colors)

Gene blobs

Allen Brain

By celltype
datasets$ab %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

Betsholtz

By celltype
datasets$bh %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

Castelo-Branco

By condition
datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "Group", colors = colors)

By celltype
datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

Daneman

By condition
datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "subgroup", colors = colors)

By cluster
datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "cluster")

Geneset ratio

Click on the plot to scroll through the comparisons (shift-click to cycle back).

Prepare

pairs.to.contrast <-
  list(
    list(A = goi$col$name, B = goi$air$name),
    list(A = goi$glycolysis$name, B = goi$air$name),
    list(A = goi$fib_mig$name, B = goi$air$name)
  )
show_ab_density <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
  stopifnot(
    all(sce@colData[[group.by]] %in% names(colors))
  )

  sce %<>%
    scater::aggregateAcrossFeatures(
      ids = symbol2goiset(rownames(.)),
      average = TRUE,
      use.assay.type = "logcounts"
    )

  pairs.to.contrast %>%
    lapply(dual(function(A, B) {
      data.frame(
        a = sce@assays@data$logcounts[A, ],
        b = sce@assays@data$logcounts[B, ],
        g = sce@colData[[group.by]]
      ) %>% {
        (.) %>%
          ggplot(aes(x = (a - b), color = g)) +
          labs(x = paste0(" - ", B, " + ", A)) +
          labs(y = "Fraction of cells (i.e., density)") +
          labs(color = group.by) +
          scale_color_manual(values = colors, breaks = gglast(colour)) +
          # ggtitle(paste0(A, " vs ", B)) +
          geom_density(size = 3)
      }
    }))
}
show_ab_2d <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
  stopifnot(
    all(sce@colData[[group.by]] %in% names(colors))
  )

  # Group -- for color
  g <- sce@colData[[group.by]]

  # Library size -- for size
  s <- sce@assays@data$counts %>% colSums()

  sce %<>%
    scater::aggregateAcrossFeatures(
      ids = symbol2goiset(rownames(.)),
      average = TRUE,
      use.assay.type = "logcounts"
    )

  pairs.to.contrast %>%
    lapply(dual(function(A, B) {
      data.frame(
        x = sce@assays@data$logcounts[A, ],
        y = sce@assays@data$logcounts[B, ],
        g = g,
        s = s
      ) %>% {
        (.) %>%
          ggplot(aes(x = x, y = y, color = g, size = s)) +
          labs(x = A, y = B, color = group.by, size = "Library size") +
          scale_color_manual(values = colors, breaks = gglast(colour)) +
          geom_point(alpha = 0.6) +
          # ggtitle(paste0(A, " vs ", B)) +
          guides(shape = FALSE) +
          geom_density_2d(size = 0.3, alpha = 0.5) +
          guides(color = guide_legend(override.aes = list(alpha = 1)))
      }
    }))
}

Allen Brain

datasets$ab %>% show_ab_density(group.by = "celltype", colors = colors)

datasets$ab %>% show_ab_2d(group.by = "celltype", colors = colors)
## now dyn.load("/usr/lib/R/library/MASS/libs/MASS.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/isoband/libs/isoband.so") ...

Betsholtz

datasets$bh %>% show_ab_density(group.by = "celltype", colors = colors)

datasets$bh %>% show_ab_2d(group.by = "celltype", colors = colors)

Castelo-Branco

datasets$cb %>% show_ab_density(group.by = "celltype", colors = colors)

datasets$cb %>% show_ab_2d(group.by = "celltype", colors = colors)

Daneman: clusters

datasets$dm %>% show_ab_density(group.by = "cluster")

datasets$dm %>% show_ab_2d(group.by = "cluster")

Daneman: subgroup

datasets$dm %>% show_ab_density(group.by = "subgroup", colors = colors)

datasets$dm %>% show_ab_2d(group.by = "subgroup", colors = colors)

DE

Prep

Compute DE
de <- list(
  # Other/375_VLMC
  ab_other_vs_375 = datasets$ab %>% compute_de(~celltype),

  # FB2/FB1:
  bh_fb2_vs_fb1 = datasets$bh %>% compute_de(~celltype),

  # EAE/Ctrl (exclude the VLMC3 cells because they separate on PCA):
  cb_eae_vs_ctrl = datasets$cb %>% `[`(, (.)$celltype != "VLMC3") %>% compute_de(~Group),

  # Other/VLMC3:
  cb_other_vs_vlmc3 = datasets$cb %>% compute_de(~celltype),

  # EAE/Healthy:
  dm_eae_vs_hthy = datasets$dm %>% compute_de(~group)
)
## Warning in DESeq2::DESeqDataSet(., design = design): 49 duplicate rownames were
## renamed by adding numbers
## Warning in DESeq2::DESeqDataSet(., design = design): 29 duplicate rownames were
## renamed by adding numbers
## Warning in DESeq2::DESeqDataSet(., design = design): 47 duplicate rownames were
## renamed by adding numbers

## Warning in DESeq2::DESeqDataSet(., design = design): 47 duplicate rownames were
## renamed by adding numbers
## Warning in DESeq2::DESeqDataSet(., design = design): 54 duplicate rownames were
## renamed by adding numbers
Heatmap of genesets
show_heatmap_slides <- function(sce, de_table) {
  genesets <- list(
    list(name = "Neuron projection", genes = go2symbols("neuron projection")),
    list(name = "Cell adhesion", genes = go2symbols("cell adhesion")),
    list(name = "Fibroblast growth factor", genes = go2symbols("fibroblast growth factor")),
    list(name = "Smooth muscle", genes = go2symbols("smooth muscle")),
    list(name = "Cell communication", genes = go2symbols("cell communication")),
    list(name = "Chemotaxis", genes = go2symbols("chemotaxis")),
    list(name = "Amyloid", genes = go2symbols("amyloid")),
    list(name = "Insulin", genes = go2symbols("insulin")),
    list(name = "Actin filament", genes = go2symbols("actin filament")),
    list(name = "Apoptosis", genes = go2symbols("apoptosis")),
    goi$fib_mig,
    list(name = "Wounding", genes = go2symbols("wounding")),
    list(name = "Inflammatory", genes = go2symbols("inflammatory")),
    list(name = "Immune response", genes = go2symbols("immune response")),
    goi$neuroinflammation,
    goi$air,
    goi$glycolysis,
    goi$col,
    list(name = "Cytokine activity", genes = go2symbols("^cytokine activity")),
    list(name = "Chemokine activity", genes = go2symbols("^chemokine activity")),
    goi$eae_common_up,
    goi$FB45,
    list(name = "Most-DE genes", genes = de_table$symbol, n = 33)
  )

  # genesets <- goi[c("glycolysis", "fib_mig")]  # DEBUG

  # default max number of genes to show
  n <- 47

  genesets %>% lapply(function(geneset) {
    geneset %>%
      with({
        sub_de_table <- de_table %>% filter(symbol %in% genes)

        if (!(any(rownames(sce) %in% sub_de_table$symbol))) {
          return()
        }

        show_de_heatmap(
          sce,
          sub_de_table,
          colors = colors,
          cluster_cols = FALSE,
          n = n,
          main = name
        )
      }) %>%
      magrittr::use_series(gtable)
  })
}

AB: Other/375_VLMC

Most-DE genes
de$ab_other_vs_375$table %>% show_de_table()
Heatmaps of most-DE genes and other sets (edgeR)

Click on the image to cycle through the sets (shift-click to cycle back).

show_heatmap_slides(
  datasets$ab %>% .[, order(.$celltype)],
  de$ab_other_vs_375$table %>% filter(src == "edgeR")
)

LogCMP vs LFC from edgeR
de$ab_other_vs_375$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Up in 375_VLMC <-- LFC --> Up in Other")

Genewise LFC comparison
de$ab_other_vs_375$table %>%
  show_de_cmp_lfc()
## Warning: ggrepel: 30 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Volcano plots
de$ab_other_vs_375$table %>%
  show_de_volcano() +
  labs(x = "Up in 375_VLMC <-- LFC --> Up in Other", color = "Geneset")

Bh: FB2/FB1

Most-DE genes
de$bh_fb2_vs_fb1$table %>% show_de_table()
Heatmaps of most-DE genes and other sets (edgeR)

Click on the image to cycle through the sets.

show_heatmap_slides(
  datasets$bh %>% .[, order(.$celltype)],
  de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR")
)

LogCMP vs LFC from edgeR
de$bh_fb2_vs_fb1$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Up in FB1 <-- LFC --> Up in FB2")

Genewise LFC comparison
de$bh_fb2_vs_fb1$table %>%
  show_de_cmp_lfc()
## Warning: ggrepel: 75 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Volcano plots
de$bh_fb2_vs_fb1$table %>%
  show_de_volcano() +
  labs(x = "Up in FB1 <-- LFC --> Up in FB2", color = "Geneset")

CB: Other/VLMC3

Most-DE genes
de$cb_other_vs_vlmc3$table %>% show_de_table()
Heatmaps of most-DE genes and other sets (edgeR)

Click on the image to cycle through the sets.

show_heatmap_slides(
  datasets$cb %>% .[, order(.$celltype)],
  de$cb_other_vs_vlmc3$table %>% filter(src == "edgeR")
)

LogCMP vs LFC from edgeR
de$cb_other_vs_vlmc3$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Up in VLMC3 <-- LFC --> Up in Other")

Genewise LFC omparison
de$cb_other_vs_vlmc3$table %>%
  show_de_cmp_lfc()
## Warning: ggrepel: 27 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Volcano plots
de$cb_other_vs_vlmc3$table %>%
  show_de_volcano() +
  labs(x = "Up in VLMC3 <-- LFC --> Up in Other", color = "Geneset")

CB’: EAE/Ctrl

Most-DE genes
de$cb_eae_vs_ctrl$table %>% show_de_table()
Heatmaps of most-DE genes and other sets (edgeR)

Click on the image to cycle through the sets.

show_heatmap_slides(
  datasets$cb %>% .[, .$celltype != "VLMC3"] %>% .[, order(.$Group)],
  de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR")
)

LogCMP vs LFC from edgeR
de$cb_eae_vs_ctrl$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Up in Ctrl <-- LFC --> Up in EAE")

Genewise LFC comparison
de$cb_eae_vs_ctrl$table %>%
  show_de_cmp_lfc()
## Warning: ggrepel: 124 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Volcano plots
de$cb_eae_vs_ctrl$table %>%
  show_de_volcano() +
  labs(x = "Up in Ctrl <-- LFC --> Up in EAE", color = "Geneset")

Dm: EAE/Healthy

Most-DE genes
de$dm_eae_vs_hthy$table %>% show_de_table()
Heatmaps of most-DE genes and other sets (edgeR)

Click on the image to cycle through the sets.

datasets$dm %>%
  {
    # Remove the `celltype` columns (same as `cluster`)
    .@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
    # Order samples
    .[, order(.$subgroup)]
  } %>%
  show_heatmap_slides(
    de$dm_eae_vs_hthy$table %>% filter(src == "edgeR")
  )

LogCMP vs LFC from edgeR
de$dm_eae_vs_hthy$table %>%
  filter(src == "edgeR") %>%
  show_de_basecamp() +
  labs(x = "Healthy <-- LFC --> EAE")

Genewise LFC comparison
de$dm_eae_vs_hthy$table %>%
  show_de_cmp_lfc()
## Warning: ggrepel: 145 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Volcano plots
de$dm_eae_vs_hthy$table %>%
  show_de_volcano() +
  labs(x = "Healthy <-- LFC --> EAE", color = "Geneset")

GO

Compute

Use DE results from EdgeR.

go <-
  list(n33 = 33, n77 = 77, n150 = 150) %>% lapply(function(n) {
    de %>% lapply(function(de_result) {
      de_result$table %>%
        filter(src == "edgeR") %>%
        compute_go(n = n)
    })
  })

AB: Other/375_VLMC

go$n33$ab_other_vs_375 %>% show_go_table()

Bh: FB2/FB1

go$n33$bh_fb2_vs_fb1 %>% show_go_table()

CB: Other/VLMC3

go$n33$cb_other_vs_vlmc3 %>% show_go_table()

CB’: EAE/Ctrl

go$n33$cb_eae_vs_ctrl %>% show_go_table()

Dm: EAE/Healthy

go$n33$dm_eae_vs_hthy %>% show_go_table()

Co-expression

The straight edges are the top negative (blue) and positive (red) gene-gene Spearman correlations among the “genes of interest” within each sub-dataset. The nodes are colored by condition.

The arched edges are TF-target, ligand-receptor, protein-protein interactions from OmniPath. The color is from co-expression (yellow = neutral). The line is dashed if the observed co-expression contradicts the expected effect, e.g. inhibition is expected but positive co-expression is observed.

Click on the figure to change the view; alt-click to enlarge.

Allen Brain

All
datasets$ab %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))

374_VLMC
datasets$ab %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "374_VLMC") %>%
  coex() %>%
  show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))

375_VLMC
datasets$ab %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "375_VLMC") %>%
  coex() %>%
  show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))

376_VLMC
datasets$ab %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "376_VLMC") %>%
  coex() %>%
  show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))

Betsholtz

All
datasets$bh %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))

FB1
datasets$bh %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "FB1") %>%
  coex() %>%
  show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))

FB2
datasets$bh %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$celltype == "FB2") %>%
  coex() %>%
  show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))

Castelo-Branco

All
datasets$cb %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))

Ctrl
datasets$cb %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$Group == "Ctrl") %>%
  coex() %>%
  show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))

EAE
datasets$cb %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$Group == "EAE") %>%
  coex() %>%
  show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))

Daneman

All
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

Healthy1+2
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$subgroup %in% c("healthy1", "healthy2")) %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

Healthy3
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$subgroup == "healthy3") %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

EAE1
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$subgroup == "EAE1") %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

EAE2
datasets$dm %>%
  take_rows(get_goi_genes()) %>%
  `[`(, .$subgroup == "EAE2") %>%
  coex() %>%
  show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))

Clean up.

gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   9547192  509.9   16351961  873.3  16351961  873.3
## Vcells 144162903 1099.9  294956956 2250.4 293935161 2242.6

RAM usage.

culprits() %>%
  head(10) %>%
  kable()
size cumsize unit
datasets 470.00 670.0 Mb
omnipath.tf 66.00 200.0 Mb
omnipath.in 41.00 130.0 Mb
gene.summary 39.00 91.0 Mb
de 17.00 52.0 Mb
go_info 16.00 35.0 Mb
go 5.80 20.0 Mb
omnipath.lr 4.10 14.0 Mb
show_coex_graph 1.60 9.8 Mb
sce_dummy 0.52 8.2 Mb

Combined

>

Prepare

Combined DE table
de_tables <- list(
  a = de$ab_other_vs_375$table %>% mutate(name = "AB: Other/375_VLMC"),
  b = de$bh_fb2_vs_fb1$table %>% mutate(name = "Bh: FB2/FB1"),
  c1 = de$cb_other_vs_vlmc3$table %>% mutate(name = "CB: Other/VLMC3"),
  c2 = de$cb_eae_vs_ctrl$table %>% mutate(name = "CB': EAE/Ctrl"),
  d = de$dm_eae_vs_hthy$table %>% mutate(name = "Dm: EAE/Healthy")
)
de_tables %>%
  dual(rbind) %>%
  slice_sample(n = 7)
## # A tibble: 7 x 7
##   symbol         base    lfc     p src    color     name           
## * <chr>         <dbl>  <dbl> <dbl> <chr>  <chr>     <chr>          
## 1 Tbx3os2        2.41  0     1     edgeR  #7E801BFF CB': EAE/Ctrl  
## 2 Mpi            1.24  0.597 1     DESeq2 #B27634FF CB': EAE/Ctrl  
## 3 D230025D16Rik  4.42 -0.216 1     edgeR  #4263F3FF Bh: FB2/FB1    
## 4 Tril           7.14 -0.877 0.560 edgeR  #4F76F1FF Bh: FB2/FB1    
## 5 Stfa3          9.27  0     1     edgeR  #35DE12FF Dm: EAE/Healthy
## 6 Hnrnpl         2.63  0.278 1     DESeq2 #9D7A2AFF CB': EAE/Ctrl  
## 7 Tent5b         1.57  3.36  1     DESeq2 #E56C4CFF CB': EAE/Ctrl
Combined SCE dataset with reduced geneset
datasets$combo <-
  list(
    ab = data.frame(
      datasets$ab@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
      group = "AB",
      subgroup = datasets$ab$celltype
    ),
    bh = data.frame(
      datasets$bh@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
      group = "Bh",
      subgroup = datasets$bh$celltype
    ),
    cb = data.frame(
      datasets$cb@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
      group = "CB",
      subgroup = datasets$cb$Group
    ),
    dm = data.frame(
      datasets$dm@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
      group = "Dm",
      subgroup = datasets$dm$subgroup
    ) %>% {
      (.)[sample(rownames(.), size = min(nrow(.), 777)), ]
    }
  ) %>%
  lapply(function(ds) {
    ds[, Reduce(intersect, lapply(., colnames))]
  }) %>%
  dual(rbind) %>%
  {
    make_sce(
      (.) %>% select(-group, -subgroup) %>% t(),
      (.) %>% select(group, subgroup)
    )
  }
print(datasets$combo)
## class: SingleCellExperiment 
## dim: 333 1017 
## metadata(0):
## assays(1): counts
## rownames(333): Fos Fosb ... Tpm2 Actb
## rowData names(0):
## colnames(1017): ab.CAGAATCTCTGGGCCA-L8TX_181011_01_A03
##   ab.CGTCAGGTCTGTCTAT-L8TX_180115_01_G11 ... dm.EAE1_CTTCTCTTCAAAGTAG
##   dm.healthy2_CCTACCATCAGCGACC
## colData names(2): group subgroup
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Compare LFC

LFC x LFC

Compare the gene LFC (edgeR) across datasets. Each dot is a gene with x-coordinate as the LFC in one dataset and y-coordinate as the LFC in another.

de_tables %>%
  lapply(function(t) {
    t %>%
      group_by(name, src) %>%
      mutate(lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
      ungroup() %>%
      filter(src == "edgeR")
  }) %>%
  with({
    rbind(
      merge(a, b, by = "symbol"),
      merge(a, c1, by = "symbol"),
      merge(a, c2, by = "symbol"),
      merge(a, d, by = "symbol"),
      merge(b, c1, by = "symbol"),
      merge(b, c2, by = "symbol"),
      merge(b, d, by = "symbol"),
      merge(c1, d, by = "symbol"),
      merge(c2, d, by = "symbol")
    )
  }) %>%
  mutate(goi_set = symbol2goiset(symbol)) %>%
  {
    ggplot(., aes(x = lfc.x, y = lfc.y)) +

      # Background
      geom_point(alpha = 0.05, color = "black", size = 0.7) +
      geom_smooth(formula = y ~ x, method = "lm", size = 0.2, color = "gray") +

      # Foreground
      geom_point(
        data = filter(., goi_set != "Other"),
        aes(color = goi_set),
        stroke = 0, size = 1, alpha = 0.7
      ) +

      # geom_smooth(data = filter(., goi_set != "Other"), formula = y ~ x, method = 'lm', size = 0.2) +

      labs(x = "LFC", y = "LFC", color = "") +

      # scale_color_brewer(palette = "Set1") +

      facet_grid(
        cols = vars(name.x), rows = vars(name.y),
        scale = "free"
      ) +
      theme(
        legend.position = "bottom",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        strip.text = element_text(size = 10),
        axis.title = element_blank()
      ) +
      guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
  }

Top DE

The following table shows the top genes that are DE (one way or another according to edgeR) simultaneously in several datasets.

make_common_de_table <- function(de_tables, slicer = slice_max) {
  de_tables %>%
    
    lapply(function(t) {
      t %>%
        group_by(name, src) %>%
        mutate(norm.lfc = ((lfc - mean(lfc)) / sd(lfc))) %>%
        slicer(abs(norm.lfc), n = 777) %>%
        ungroup() %>%
        filter(src == "edgeR")
    }) %>%
    
    dual(rbind) %>%
    as.data.frame() %>%
    
    select(symbol, norm.lfc, name) %>%
    stats::reshape(direction = "wide", idvar = "symbol", timevar = "name") %>%
    # Commonly-DE first
    arrange(desc(rowSums(!is.na(across(where(is.numeric)))))) %>%
    
    # Signature of NAs
    rowwise() %>%
    mutate(signature = paste(across(where(is.numeric), ~ ifelse(is.na(.x), "N", "Y")), collapse = "")) %>%
    ungroup() %>%
    
    # Role: TF, Rec, Lig, etc
    mutate(role = symbol2role(symbol)) %>%
    # Reset rownames
    as.data.frame(row.names = 1:nrow(.)) %>%
    # Formatting
    mutate(symbol = as.link(symbol)) %>%
    mutate(across(where(is.numeric), signif, 3)) %>%
    DT::datatable(escape = FALSE, rownames = TRUE)
}
de_tables %>% make_common_de_table(slicer = slice_max)
Up/down in EAE

Top genes that are DE (edgeR) in EAE.

list(
  cb = de$cb_eae_vs_ctrl$table,
  dm = de$dm_eae_vs_hthy$table
) %>%
  
  lapply(. %>% filter(src == "edgeR")) %>%
  lapply(. %>% filter((lfc < quantile(lfc, 0.03)) | (quantile(lfc, 0.97) < lfc))) %>%
  
  with(merge(
    cb %>% select(symbol, lfc.cb = lfc),
    dm %>% select(symbol, lfc.dm = lfc),
    by = "symbol"
  )) %>%
  
  ggplot(aes(x = lfc.cb, y = lfc.dm)) + 
  
  labs(x = "Healthy <- LFC Daneman -> EAE") +
  labs(y = "Healthy <- LFC Castelo-Branco -> EAE") +
  
  geom_hline(yintercept = 0, alpha = 0.2) +
  geom_vline(xintercept = 0, alpha = 0.2) +
  
  geom_point(size = 1, alpha = 0.2) +
  
  ggrepel::geom_text_repel(
    aes(label = symbol),
    hjust = 1.1, size = 4, show.legend = FALSE,
    max.overlaps = Inf, point.size = NA,
    segment.alpha = 0.2
  ) 

Agg. heatmaps

Prepare

Stack of grouped datasets in long form (reduced gene set)
datasets$grouped <-
  list(
    list(ds = datasets$ab, name = "AB", group.by = "celltype"),
    list(ds = datasets$bh, name = "Bh", group.by = "celltype"),
    list(ds = datasets$cb, name = "CB", group.by = "celltype"),
    list(ds = datasets$cb, name = "CB", group.by = "Group"),
    list(ds = datasets$dm, name = "Dm", group.by = "cluster"),
    list(ds = datasets$dm, name = "Dm", group.by = "subgroup")
  ) %>%
  lapply(dual(
    function(ds, name, group.by) {
      ds %>%
        # Subset genes
        take_rows(
          unique(goi %>% lapply(as.data.frame) %>% dual(rbind) %>% pull(genes))
        ) %>%
        scuttle::aggregateAcrossCells(ids = ds[[group.by]], use.assay.type = "logcounts", statistics = "mean") %>%
        SingleCellExperiment::logcounts() %>%
        as.data.frame() %>%
        ind2col("symbol") %>%
        reshape2::melt(id.vars = "symbol", variable.name = "subgroup", value.name = "expression") %>%
        mutate(dataset = name, subdataset = paste0(name, " (", group.by, ")"), group = group.by) %>%
        mutate(role = symbol2role(symbol))

      # TODO: add description?
    }
  )) %>%
  dual(rbind)
Symbols to highlight
interesting <- c(
  # Daneman clusters
  c("Rbp1", "Fn1", "Igfbp7"),

  # Aerobic
  c("Cox3b", "Cox7c", "Bloc1s1"),

  # Anaerobic glycolysis
  c("Ier3"),

  # Collagen
  c("Col4a1", "Col4a2", "Col8a1", "Col3a1"),

  # Fib. migration
  c("Sdc4")
)
Tiled heatmap plotter
show_stack <- function(ds) {
  ds$role

  ds %>%
    mutate(role = if_else(role == "", "Unknown", role)) %>%
    
    group_by(subdataset) %>%
    mutate(expression = 100 * expression / max(expression)) %>%
    ungroup() %>%
    
    mutate(
      symbol = if_else(symbol %in% interesting, glue("<span style='color:red;'>{symbol}</span>"), symbol)
    ) %>%
    mutate(color = c(ramp(0:7), colors)[as.character(subgroup)]) %>%
    mutate(subgroup = glue("<span name='{subgroup}' style='color:{color};'>{subgroup}</span>")) %>%
    # Append role to symbol
    # mutate(symbol = paste0(symbol, " (", role, ")")) %>%

    # Heatmaps
    ggplot(aes(x = subgroup, y = symbol, fill = expression)) +
    geom_tile() +
    scale_y_discrete(limits = rev) +
    scale_fill_gradient(low = "black", high = "yellow") +
    labs(fill = "%") +
    theme(
      axis.text.x = ggtext::element_markdown(
        size = 12, angle = 90, vjust = 0.5, hjust = 1
      ),
      axis.title = element_blank(),
      legend.title = element_text(size = 9),
      legend.text = element_text(size = 9),

      # Remove the `role` strip altogether
      # strip.text.y = element_blank(),

      strip.text.y = element_text(color = "black", angle = 0, size = 8),
      strip.text.x = element_text(color = "black", size = 10),
    ) +
    facet_grid(
      cols = vars(subdataset),
      rows = vars(role),
      space = "free",
      scales = "free",
      labeller = label_wrap_gen(5)
    ) +
    theme(axis.text.y = ggtext::element_markdown())
}

Daneman clusters

Click on the plot to scroll through the clusters (shift-click to cycle back).

(0:7) %>%
  lapply(function(i) {
    datasets$grouped %>%
      filter(symbol %in% goi[[paste0("c", i)]]$genes) %>%
      show_stack() +
      ggtitle(paste0("Cluster #", i)) +
      theme(axis.text.y = ggtext::element_markdown(size = 12))
  })

Inflammation

datasets$grouped %>%
  filter(symbol %in% goi$neuroinflammation$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 12))

Col-

datasets$grouped %>%
  filter(symbol %in% goi$col$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 9))

Aerobic

datasets$grouped %>%
  filter(symbol %in% goi$air$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 7))

Anaerobic

datasets$grouped %>%
  filter(symbol %in% goi$glycolysis$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 6))

Fib. migration

datasets$grouped %>%
  filter(symbol %in% goi$fib_mig$genes) %>%
  show_stack() +
  theme(axis.text.y = ggtext::element_markdown(size = 10))

Geneset-PCA

For some of the gene sets, the following tabs show reduced dimension plots (PCA/TSNE/UMAP) once the combined dataset is restricted to that gene set.

Prepare

Plotting 2d dimension reduction
colors # required below
##         374_VLMC         375_VLMC         376_VLMC              FB1 
##    "aquamarine3"    "chartreuse4"    "chartreuse3" "cornflowerblue" 
##              FB2             Ctrl              EAE            VLMC1 
##           "blue"    "chartreuse4"         "coral2"    "chartreuse1" 
##            VLMC3            VLMC4          healthy             EAE1 
##         "coral3"         "coral4"          "green"         "coral1" 
##             EAE2         healthy1         healthy2         healthy3 
##         "coral3"     "aquamarine"    "chartreuse3"    "chartreuse4" 
##         positive         negative             G1.S                S 
##           "Red4"     "Steelblue1"        "#E16A86"        "#B88A00" 
##               G2             G2.M             M.G1 
##        "#50A315"        "#00AD9A"        "#009ADE"
common_map <- function(sce, dimred = NULL) {
  sce %<>% drop_empty()

  if (is.null(dimred)) {
    sce %<>% norm1() %>% scater_attach()
    return(list(
      PCA = common_map(sce, dimred = "PCA"),
      TSNE = common_map(sce, dimred = "TSNE"),
      UMAP = common_map(sce, dimred = "UMAP")
    ))
  }

  sce %>%
    {
      cbind(
        .@colData %>% as.data.frame(),
        list(
          TSNE = .@int_colData$reducedDims$TSNE %>%
            as.data.frame() %>%
            select(x = V1, y = V2),
          UMAP = .@int_colData$reducedDims$UMAP %>%
            as.data.frame() %>%
            select(x = V1, y = V2),
          PCA = .@int_colData$reducedDims$PCA %>%
            as.data.frame() %>%
            select(x = PC1, y = PC2)
        )[[dimred]]
      )
    } %>%
    group_by(group) %>%
    arrange(subgroup) %>%
    mutate(rk = as.factor(dense_rank(subgroup))) %>%
    ungroup() %>%
    mutate(color = colors[as.character(subgroup)]) %>%
    mutate(subgroup = paste0(group, ": ", subgroup)) %>%
    {
      ggplot((.), aes(x = x, y = y, color = subgroup)) +
        geom_point(data = ((.) %>% select(-group)), alpha = 0.05, size = 1) +
        geom_point(alpha = 0.5, size = 1.5) +
        facet_wrap(. ~ group) +
        scale_color_manual(values = (select(., color, subgroup) %>% pull(color, subgroup))) +
        ggplot2::ggtitle(dimred) +
        theme(
          axis.title = element_blank(),
          axis.ticks = element_blank(),
          axis.text = element_blank(),
          plot.title = element_text(),
        ) +
        guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
    }
}
Plotting 3d PCA
common_map_3d <- function(sce, color.by = "subgroup") {
  sce %<>%
    drop_empty() %>%
    norm1() %>%
    scater::logNormCounts() %>%
    scater::runPCA()

  data.frame(
    color_group = sce@colData[[color.by]],
    lib.size = sce@assays@data$counts %>% colSums(),
    SingleCellExperiment::reducedDim(sce, "PCA") %>%
      as.data.frame() %>%
      select(PC1, PC2, PC3)
  ) %>%
    group_by(color_group) %>%
    sample(size = 33, replace = TRUE) %>%
    ungroup() %>%
    # plotly::plot_ly(type = "scatter3d", mode = "markers",

    plotly::plot_ly() %>%
    plotly::add_markers(
      x = ~PC1, y = ~PC2, z = ~PC3,
      marker = list(size = 2, opacity = 1),
      color = ~color_group, colors = colors
    ) %>%
    plotly::layout(
      scene = list(
        xaxis = list(title = "PC1", showticklabels = FALSE),
        yaxis = list(title = "PC2", showticklabels = FALSE),
        zaxis = list(title = "PC3", showticklabels = FALSE)
      )
    )
}

All GoI

datasets$combo %>% common_map()

datasets$combo %>% common_map_3d()

Inflammation

geneset <- goi$neuroinflammation$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Col-

geneset <- goi$col$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Aerobic

geneset <- goi$air$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Anaerobic

geneset <- goi$glycolysis$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Fib. migration

geneset <- goi$fib_mig$genes
datasets$combo %>%
  take_rows(geneset) %>%
  common_map()

datasets$combo %>%
  take_rows(geneset) %>%
  common_map_3d()

Kidney

For each dataset we compare the most-DE genes to those from the kidney fibroblast activation gene set SI7 (see above). The LFCs are first standardized and filtered to top absolute LFC.

Prepare

de_tables_vs_si7 <-
  de_tables %>%
  dual(rbind) %>%
  group_by(name, src) %>%
  mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
  filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
  ungroup() %>%
  merge(
    y = fb_genes_si7 %>%
      select(lfc = logFC) %>%
      ind2col("symbol") %>%
      mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
      filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
      rename(norm.lfc.ref = norm.lfc),
    by = "symbol",
    all = TRUE
  ) %>%
  mutate(delta = norm.lfc - norm.lfc.ref)

Plot Dataset vs Kidney

We only keep the LFC from edgeR.

de_tables_vs_si7 %>%
  tidyr::drop_na() %>%
  filter(src == "edgeR") %>%
  group_by(name, src) %>%
  mutate(is_out = FALSE) %>%
  mutate(
    is_out = is_out |
      symbol %in% (slice_min(., norm.lfc, n = 5) %>% pull(symbol)) |
      symbol %in% (slice_max(., norm.lfc, n = 5) %>% pull(symbol))
  ) %>%
  mutate(
    is_out = is_out |
      symbol %in% (slice_min(., norm.lfc.ref, n = 5) %>% pull(symbol)) |
      symbol %in% (slice_max(., norm.lfc.ref, n = 5) %>% pull(symbol))
  ) %>%
  mutate(label = if_else(is_out, symbol, "")) %>%
  ungroup() %>%
  ggplot(aes(x = norm.lfc, y = norm.lfc.ref, color = name)) +
  ggrepel::geom_text_repel(
    aes(label = label),
    hjust = 1.1, size = 3, show.legend = FALSE,
    max.overlaps = Inf, point.size = NA,
    segment.alpha = 0.2
  ) +
  geom_point(size = 1, alpha = 0.3) +
  geom_hline(yintercept = 0, alpha = 0.2) +
  geom_vline(xintercept = 0, alpha = 0.2) +
  labs(x = "Standardized LFC in Dataset", y = "Standardized LFC in Reference", color = "Dataset") +
  scale_color_brewer(palette = "Set1") +
  theme(
    legend.title = element_text(size = 9),
    legend.text = element_text(size = 9),
    legend.position = "bottom"
  ) +
  guides(color = guide_legend(override.aes = list(alpha = 1, size = 3)))

Table

The genes that have a very different LFC (between two conditions within a dataset) from the set SI7 (see data sources) are the one with the most positive or most negative “delta”.

de_tables_vs_si7 %>%
  tidyr::drop_na() %>%
  select(symbol, dataset = name, src, norm.lfc, norm.lfc.ref, delta) %>%
  mutate(symbol = as.link(symbol)) %>%
  DT::datatable(escape = FALSE)

Clean up.

gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   9610141  513.3   16351961  873.3  16351961  873.3
## Vcells 146057741 1114.4  294956956 2250.4 294952040 2250.4

RAM usage.

culprits() %>%
  head(10) %>%
  kable()
size cumsize unit
datasets 470.0 690 Mb
omnipath.tf 66.0 220 Mb
omnipath.in 41.0 150 Mb
gene.summary 39.0 110 Mb
de_tables 19.0 73 Mb
de 17.0 54 Mb
go_info 16.0 37 Mb
go 5.8 22 Mb
omnipath.lr 4.1 16 Mb
show_coex_graph 1.6 12 Mb